kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 01/21] x86/fpu/xstate: Modify initialization helper to handle both static and dynamic buffers
       [not found] <20201223155717.19556-1-chang.seok.bae@intel.com>
@ 2020-12-23 15:56 ` Chang S. Bae
  2021-01-15 12:40   ` Borislav Petkov
  2020-12-23 15:56 ` [PATCH v3 03/21] x86/fpu/xstate: Modify address finders " Chang S. Bae
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Chang S. Bae @ 2020-12-23 15:56 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, dave.hansen, jing2.liu, ravi.v.shankar, linux-kernel,
	chang.seok.bae, kvm

In preparation for dynamic xstate buffer expansion, update the buffer
initialization function parameters to equally handle static in-line xstate
buffer, as well as dynamically allocated xstate buffer.

init_fpstate is a special case, which is indicated by a null pointer
parameter to fpstate_init().

Also, fpstate_init_xstate() now accepts the state component bitmap to
configure XCOMP_BV for the compacted format.

No functional change.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v2:
* Updated the changelog with task->fpu removed. (Boris Petkov)
---
 arch/x86/include/asm/fpu/internal.h |  6 +++---
 arch/x86/kernel/fpu/core.c          | 14 +++++++++++---
 arch/x86/kernel/fpu/init.c          |  2 +-
 arch/x86/kernel/fpu/regset.c        |  2 +-
 arch/x86/kernel/fpu/xstate.c        |  3 +--
 arch/x86/kvm/x86.c                  |  2 +-
 6 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 8d33ad80704f..d81d8c407dc0 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -80,20 +80,20 @@ static __always_inline __pure bool use_fxsr(void)
 
 extern union fpregs_state init_fpstate;
 
-extern void fpstate_init(union fpregs_state *state);
+extern void fpstate_init(struct fpu *fpu);
 #ifdef CONFIG_MATH_EMULATION
 extern void fpstate_init_soft(struct swregs_state *soft);
 #else
 static inline void fpstate_init_soft(struct swregs_state *soft) {}
 #endif
 
-static inline void fpstate_init_xstate(struct xregs_state *xsave)
+static inline void fpstate_init_xstate(struct xregs_state *xsave, u64 xcomp_mask)
 {
 	/*
 	 * XRSTORS requires these bits set in xcomp_bv, or it will
 	 * trigger #GP:
 	 */
-	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
+	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xcomp_mask;
 }
 
 static inline void fpstate_init_fxstate(struct fxregs_state *fx)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index eb86a2b831b1..f23e5ffbb307 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -191,8 +191,16 @@ static inline void fpstate_init_fstate(struct fregs_state *fp)
 	fp->fos = 0xffff0000u;
 }
 
-void fpstate_init(union fpregs_state *state)
+/* A null pointer parameter indicates init_fpstate. */
+void fpstate_init(struct fpu *fpu)
 {
+	union fpregs_state *state;
+
+	if (fpu)
+		state = &fpu->state;
+	else
+		state = &init_fpstate;
+
 	if (!static_cpu_has(X86_FEATURE_FPU)) {
 		fpstate_init_soft(&state->soft);
 		return;
@@ -201,7 +209,7 @@ void fpstate_init(union fpregs_state *state)
 	memset(state, 0, fpu_kernel_xstate_size);
 
 	if (static_cpu_has(X86_FEATURE_XSAVES))
-		fpstate_init_xstate(&state->xsave);
+		fpstate_init_xstate(&state->xsave, xfeatures_mask_all);
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
 	else
@@ -261,7 +269,7 @@ static void fpu__initialize(struct fpu *fpu)
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	set_thread_flag(TIF_NEED_FPU_LOAD);
-	fpstate_init(&fpu->state);
+	fpstate_init(fpu);
 	trace_x86_fpu_init_state(fpu);
 }
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 701f196d7c68..74e03e3bc20f 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -124,7 +124,7 @@ static void __init fpu__init_system_generic(void)
 	 * Set up the legacy init FPU context. (xstate init might overwrite this
 	 * with a more modern format, if the CPU supports it.)
 	 */
-	fpstate_init(&init_fpstate);
+	fpstate_init(NULL);
 
 	fpu__init_system_mxcsr();
 }
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index c413756ba89f..4c4d9059ff36 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -144,7 +144,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	 * In case of failure, mark all states as init:
 	 */
 	if (ret)
-		fpstate_init(&fpu->state);
+		fpstate_init(fpu);
 
 	return ret;
 }
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 5d8047441a0a..1a3e5effe0fa 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -457,8 +457,7 @@ static void __init setup_init_fpu_buf(void)
 	print_xstate_features();
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
-						     xfeatures_mask_all;
+		fpstate_init_xstate(&init_fpstate.xsave, xfeatures_mask_all);
 
 	/*
 	 * Init all the features state with header.xfeatures being 0x0
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e545a8a613b1..45704f106815 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9820,7 +9820,7 @@ static int sync_regs(struct kvm_vcpu *vcpu)
 
 static void fx_init(struct kvm_vcpu *vcpu)
 {
-	fpstate_init(&vcpu->arch.guest_fpu->state);
+	fpstate_init(vcpu->arch.guest_fpu);
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
 			host_xcr0 | XSTATE_COMPACTION_ENABLED;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 03/21] x86/fpu/xstate: Modify address finders to handle both static and dynamic buffers
       [not found] <20201223155717.19556-1-chang.seok.bae@intel.com>
  2020-12-23 15:56 ` [PATCH v3 01/21] x86/fpu/xstate: Modify initialization helper to handle both static and dynamic buffers Chang S. Bae
@ 2020-12-23 15:56 ` Chang S. Bae
  2021-01-15 13:06   ` Borislav Petkov
  2020-12-23 15:57 ` [PATCH v3 04/21] x86/fpu/xstate: Modify context switch helpers " Chang S. Bae
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Chang S. Bae @ 2020-12-23 15:56 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, dave.hansen, jing2.liu, ravi.v.shankar, linux-kernel,
	chang.seok.bae, kvm

In preparation for dynamic xstate buffer expansion, update the buffer
address finder function parameters to equally handle static in-line xstate
buffer, as well as dynamically allocated xstate buffer.

init_fpstate is a special case, which is indicated by a null pointer
parameter to get_xsave_addr() and __raw_xsave_addr().

No functional change.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v2:
* Updated the changelog with task->fpu removed. (Boris Petkov)

Changes from v1:
* Rebased on the upstream kernel (5.10)
---
 arch/x86/include/asm/fpu/internal.h |  2 +-
 arch/x86/include/asm/fpu/xstate.h   |  2 +-
 arch/x86/include/asm/pgtable.h      |  2 +-
 arch/x86/kernel/cpu/common.c        |  2 +-
 arch/x86/kernel/fpu/xstate.c        | 50 +++++++++++++++++++----------
 arch/x86/kvm/x86.c                  | 26 +++++++++------
 arch/x86/mm/pkeys.c                 |  2 +-
 7 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index d81d8c407dc0..0153c4d4ca77 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -579,7 +579,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 	 * return to userland e.g. for a copy_to_user() operation.
 	 */
 	if (current->mm) {
-		pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
+		pk = get_xsave_addr(new_fpu, XFEATURE_PKRU);
 		if (pk)
 			pkru_val = pk->pkru;
 	}
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index e0f1b22f53ce..24bf8d3f559a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -100,7 +100,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 extern void __init update_regset_xstate_info(unsigned int size,
 					     u64 xstate_mask);
 
-void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+void *get_xsave_addr(struct fpu *fpu, int xfeature_nr);
 const void *get_xsave_field_ptr(int xfeature_nr);
 int using_compacted_format(void);
 int xfeature_size(int xfeature_nr);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a02c67291cfc..83268b41444f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -141,7 +141,7 @@ static inline void write_pkru(u32 pkru)
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return;
 
-	pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU);
+	pk = get_xsave_addr(&current->thread.fpu, XFEATURE_PKRU);
 
 	/*
 	 * The PKRU value in xstate needs to be in sync with the value that is
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 35ad8480c464..860b19db208b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -478,7 +478,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
 		return;
 
 	cr4_set_bits(X86_CR4_PKE);
-	pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
+	pk = get_xsave_addr(NULL, XFEATURE_PKRU);
 	if (pk)
 		pk->pkru = init_pkru_value;
 	/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6156dad0feb6..2010c31d25e1 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -894,15 +894,24 @@ void fpu__resume_cpu(void)
  * Given an xstate feature nr, calculate where in the xsave
  * buffer the state is.  Callers should ensure that the buffer
  * is valid.
+ *
+ * A null pointer parameter indicates to use init_fpstate.
  */
-static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
+static void *__raw_xsave_addr(struct fpu *fpu, int xfeature_nr)
 {
+	void *xsave;
+
 	if (!xfeature_enabled(xfeature_nr)) {
 		WARN_ON_FPU(1);
 		return NULL;
 	}
 
-	return (void *)xsave + xstate_comp_offsets[xfeature_nr];
+	if (fpu)
+		xsave = &fpu->state.xsave;
+	else
+		xsave = &init_fpstate.xsave;
+
+	return xsave + xstate_comp_offsets[xfeature_nr];
 }
 /*
  * Given the xsave area and a state inside, this function returns the
@@ -915,15 +924,18 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
  * this will return NULL.
  *
  * Inputs:
- *	xstate: the thread's storage area for all FPU data
+ *	fpu: the thread's FPU data to reference xstate buffer(s).
+ *	     (A null pointer parameter indicates init_fpstate.)
  *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
  *	XFEATURE_SSE, etc...)
  * Output:
  *	address of the state in the xsave area, or NULL if the
  *	field is not present in the xsave buffer.
  */
-void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
+void *get_xsave_addr(struct fpu *fpu, int xfeature_nr)
 {
+	struct xregs_state *xsave;
+
 	/*
 	 * Do we even *have* xsave state?
 	 */
@@ -936,6 +948,12 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 	 */
 	WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
 		  "get of unsupported state");
+
+	if (fpu)
+		xsave = &fpu->state.xsave;
+	else
+		xsave = &init_fpstate.xsave;
+
 	/*
 	 * This assumes the last 'xsave*' instruction to
 	 * have requested that 'xfeature_nr' be saved.
@@ -950,7 +968,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
 		return NULL;
 
-	return __raw_xsave_addr(xsave, xfeature_nr);
+	return __raw_xsave_addr(fpu, xfeature_nr);
 }
 EXPORT_SYMBOL_GPL(get_xsave_addr);
 
@@ -981,7 +999,7 @@ const void *get_xsave_field_ptr(int xfeature_nr)
 	 */
 	fpu__save(fpu);
 
-	return get_xsave_addr(&fpu->state.xsave, xfeature_nr);
+	return get_xsave_addr(fpu, xfeature_nr);
 }
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
@@ -1116,7 +1134,7 @@ void copy_xstate_to_kernel(struct membuf to, struct fpu *fpu)
 		 * Copy only in-use xstates:
 		 */
 		if ((header.xfeatures >> i) & 1) {
-			void *src = __raw_xsave_addr(xsave, i);
+			void *src = __raw_xsave_addr(fpu, i);
 
 			copy_part(&to, &last, xstate_offsets[i],
 				  xstate_sizes[i], src);
@@ -1145,13 +1163,11 @@ int copy_kernel_to_xstate(struct fpu *fpu, const void *kbuf)
 	if (validate_user_xstate_header(&hdr))
 		return -EINVAL;
 
-	xsave = &fpu->state.xsave;
-
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		u64 mask = ((u64)1 << i);
 
 		if (hdr.xfeatures & mask) {
-			void *dst = __raw_xsave_addr(xsave, i);
+			void *dst = __raw_xsave_addr(fpu, i);
 
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
@@ -1160,6 +1176,8 @@ int copy_kernel_to_xstate(struct fpu *fpu, const void *kbuf)
 		}
 	}
 
+	xsave = &fpu->state.xsave;
+
 	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
 		offset = offsetof(struct fxregs_state, mxcsr);
 		size = MXCSR_AND_FLAGS_SIZE;
@@ -1202,13 +1220,11 @@ int copy_user_to_xstate(struct fpu *fpu, const void __user *ubuf)
 	if (validate_user_xstate_header(&hdr))
 		return -EINVAL;
 
-	xsave = &fpu->state.xsave;
-
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		u64 mask = ((u64)1 << i);
 
 		if (hdr.xfeatures & mask) {
-			void *dst = __raw_xsave_addr(xsave, i);
+			void *dst = __raw_xsave_addr(fpu, i);
 
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
@@ -1218,6 +1234,8 @@ int copy_user_to_xstate(struct fpu *fpu, const void __user *ubuf)
 		}
 	}
 
+	xsave = &fpu->state.xsave;
+
 	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
 		offset = offsetof(struct fxregs_state, mxcsr);
 		size = MXCSR_AND_FLAGS_SIZE;
@@ -1441,16 +1459,14 @@ void update_pasid(void)
 	} else {
 		struct fpu *fpu = &current->thread.fpu;
 		struct ia32_pasid_state *ppasid_state;
-		struct xregs_state *xsave;
 
 		/*
 		 * The CPU's xstate registers are not currently active. Just
 		 * update the PASID state in the memory buffer here. The
 		 * PASID MSR will be loaded when returning to user mode.
 		 */
-		xsave = &fpu->state.xsave;
-		xsave->header.xfeatures |= XFEATURE_MASK_PASID;
-		ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
+		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_PASID;
+		ppasid_state = get_xsave_addr(fpu, XFEATURE_PASID);
 		/*
 		 * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
 		 * won't be NULL and no need to check its value.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 45704f106815..09368201d9cc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4396,10 +4396,15 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 
 static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 {
-	struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
-	u64 xstate_bv = xsave->header.xfeatures;
+	struct xregs_state *xsave;
+	struct fpu *guest_fpu;
+	u64 xstate_bv;
 	u64 valid;
 
+	guest_fpu = vcpu->arch.guest_fpu;
+	xsave = &guest_fpu->state.xsave;
+	xstate_bv = xsave->header.xfeatures;
+
 	/*
 	 * Copy legacy XSAVE area, to avoid complications with CPUID
 	 * leaves 0 and 1 in the loop below.
@@ -4418,7 +4423,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 	while (valid) {
 		u64 xfeature_mask = valid & -valid;
 		int xfeature_nr = fls64(xfeature_mask) - 1;
-		void *src = get_xsave_addr(xsave, xfeature_nr);
+		void *src = get_xsave_addr(guest_fpu, xfeature_nr);
 
 		if (src) {
 			u32 size, offset, ecx, edx;
@@ -4438,10 +4443,14 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 
 static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 {
-	struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
 	u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
+	struct xregs_state *xsave;
+	struct fpu *guest_fpu;
 	u64 valid;
 
+	guest_fpu = vcpu->arch.guest_fpu;
+	xsave = &guest_fpu->state.xsave;
+
 	/*
 	 * Copy legacy XSAVE area, to avoid complications with CPUID
 	 * leaves 0 and 1 in the loop below.
@@ -4461,7 +4470,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 	while (valid) {
 		u64 xfeature_mask = valid & -valid;
 		int xfeature_nr = fls64(xfeature_mask) - 1;
-		void *dest = get_xsave_addr(xsave, xfeature_nr);
+		void *dest = get_xsave_addr(guest_fpu, xfeature_nr);
 
 		if (dest) {
 			u32 size, offset, ecx, edx;
@@ -10031,6 +10040,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vcpu->arch.apf.halted = false;
 
 	if (kvm_mpx_supported()) {
+		struct fpu *guest_fpu = vcpu->arch.guest_fpu;
 		void *mpx_state_buffer;
 
 		/*
@@ -10039,12 +10049,10 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		 */
 		if (init_event)
 			kvm_put_guest_fpu(vcpu);
-		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
-					XFEATURE_BNDREGS);
+		mpx_state_buffer = get_xsave_addr(guest_fpu, XFEATURE_BNDREGS);
 		if (mpx_state_buffer)
 			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
-		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
-					XFEATURE_BNDCSR);
+		mpx_state_buffer = get_xsave_addr(guest_fpu, XFEATURE_BNDCSR);
 		if (mpx_state_buffer)
 			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
 		if (init_event)
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 8873ed1438a9..772e8bc3d49d 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -177,7 +177,7 @@ static ssize_t init_pkru_write_file(struct file *file,
 		return -EINVAL;
 
 	WRITE_ONCE(init_pkru_value, new_init_pkru);
-	pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
+	pk = get_xsave_addr(NULL, XFEATURE_PKRU);
 	if (!pk)
 		return -EINVAL;
 	pk->pkru = new_init_pkru;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 04/21] x86/fpu/xstate: Modify context switch helpers to handle both static and dynamic buffers
       [not found] <20201223155717.19556-1-chang.seok.bae@intel.com>
  2020-12-23 15:56 ` [PATCH v3 01/21] x86/fpu/xstate: Modify initialization helper to handle both static and dynamic buffers Chang S. Bae
  2020-12-23 15:56 ` [PATCH v3 03/21] x86/fpu/xstate: Modify address finders " Chang S. Bae
@ 2020-12-23 15:57 ` Chang S. Bae
  2021-01-15 13:18   ` Borislav Petkov
  2020-12-23 15:57 ` [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes Chang S. Bae
  2020-12-23 15:57 ` [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate Chang S. Bae
  4 siblings, 1 reply; 20+ messages in thread
From: Chang S. Bae @ 2020-12-23 15:57 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, dave.hansen, jing2.liu, ravi.v.shankar, linux-kernel,
	chang.seok.bae, kvm

In preparation for dynamic xstate buffer expansion, update the xstate
restore function parameters to equally handle static in-line xstate buffer,
as well as dynamically allocated xstate buffer.

No functional change.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v2:
* Updated the changelog with task->fpu removed. (Boris Petkov)
---
 arch/x86/include/asm/fpu/internal.h | 9 ++++++---
 arch/x86/kernel/fpu/core.c          | 4 ++--
 arch/x86/kernel/fpu/signal.c        | 3 +--
 arch/x86/kvm/x86.c                  | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0153c4d4ca77..37ea5e37f21c 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -397,8 +397,9 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
  * Restore xstate from kernel space xsave area, return an error code instead of
  * an exception.
  */
-static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
+static inline int copy_kernel_to_xregs_err(struct fpu *fpu, u64 mask)
 {
+	struct xregs_state *xstate = &fpu->state.xsave;
 	u32 lmask = mask;
 	u32 hmask = mask >> 32;
 	int err;
@@ -425,8 +426,10 @@ static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask
 	}
 }
 
-static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
+static inline void copy_kernel_to_fpregs(struct fpu *fpu)
 {
+	union fpregs_state *fpstate = &fpu->state;
+
 	/*
 	 * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
 	 * pending. Clear the x87 state here by setting it to fixed values.
@@ -511,7 +514,7 @@ static inline void __fpregs_load_activate(void)
 		return;
 
 	if (!fpregs_state_valid(fpu, cpu)) {
-		copy_kernel_to_fpregs(&fpu->state);
+		copy_kernel_to_fpregs(fpu);
 		fpregs_activate(fpu);
 		fpu->last_cpu = cpu;
 	}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index f23e5ffbb307..20925cae2a84 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -172,7 +172,7 @@ void fpu__save(struct fpu *fpu)
 
 	if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
-			copy_kernel_to_fpregs(&fpu->state);
+			copy_kernel_to_fpregs(fpu);
 		}
 	}
 
@@ -248,7 +248,7 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src)
 		memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
 
 	else if (!copy_fpregs_to_fpstate(dst_fpu))
-		copy_kernel_to_fpregs(&dst_fpu->state);
+		copy_kernel_to_fpregs(dst_fpu);
 
 	fpregs_unlock();
 
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 0d6deb75c507..414a13427934 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -426,8 +426,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 * Restore previously saved supervisor xstates along with
 		 * copied-in user xstates.
 		 */
-		ret = copy_kernel_to_xregs_err(&fpu->state.xsave,
-					       user_xfeatures | xfeatures_mask_supervisor());
+		ret = copy_kernel_to_xregs_err(fpu, user_xfeatures | xfeatures_mask_supervisor());
 
 	} else if (use_fxsr()) {
 		ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 09368201d9cc..a087bbf252b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9249,7 +9249,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
 	kvm_save_current_fpu(vcpu->arch.guest_fpu);
 
-	copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
+	copy_kernel_to_fpregs(vcpu->arch.user_fpu);
 
 	fpregs_mark_activate();
 	fpregs_unlock();
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes
       [not found] <20201223155717.19556-1-chang.seok.bae@intel.com>
                   ` (2 preceding siblings ...)
  2020-12-23 15:57 ` [PATCH v3 04/21] x86/fpu/xstate: Modify context switch helpers " Chang S. Bae
@ 2020-12-23 15:57 ` Chang S. Bae
  2021-01-22 11:44   ` Borislav Petkov
  2020-12-23 15:57 ` [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate Chang S. Bae
  4 siblings, 1 reply; 20+ messages in thread
From: Chang S. Bae @ 2020-12-23 15:57 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, dave.hansen, jing2.liu, ravi.v.shankar, linux-kernel,
	chang.seok.bae, kvm

The xstate buffer is currently in-line with static size. To accommodate
dynamic user xstates, introduce variables to represent the maximum and
minimum buffer sizes.

do_extra_xstate_size_checks() calculates the maximum xstate size and sanity
checks it with CPUID. It calculates the static in-line buffer size by
excluding the dynamic user states from the maximum xstate size.

No functional change, until the kernel enables dynamic buffer support.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v2:
* Updated the changelog with task->fpu removed. (Boris Petkov)
* Renamed the in-line size variable.
* Updated some code comments.
---
 arch/x86/include/asm/processor.h | 10 +++----
 arch/x86/kernel/fpu/core.c       |  6 ++---
 arch/x86/kernel/fpu/init.c       | 36 ++++++++++++++++---------
 arch/x86/kernel/fpu/signal.c     |  2 +-
 arch/x86/kernel/fpu/xstate.c     | 46 +++++++++++++++++++++-----------
 arch/x86/kernel/process.c        |  6 +++++
 arch/x86/kvm/x86.c               |  2 +-
 7 files changed, 67 insertions(+), 41 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 82a08b585818..c9c608f8af91 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -477,7 +477,8 @@ DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 #endif	/* X86_64 */
 
-extern unsigned int fpu_kernel_xstate_size;
+extern unsigned int fpu_kernel_xstate_min_size;
+extern unsigned int fpu_kernel_xstate_max_size;
 extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
@@ -545,12 +546,7 @@ struct thread_struct {
 };
 
 /* Whitelist the FPU state from the task_struct for hardened usercopy. */
-static inline void arch_thread_struct_whitelist(unsigned long *offset,
-						unsigned long *size)
-{
-	*offset = offsetof(struct thread_struct, fpu.state);
-	*size = fpu_kernel_xstate_size;
-}
+extern void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
 
 /*
  * Thread-synchronous status.
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 20925cae2a84..1a428803e6b2 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -206,7 +206,7 @@ void fpstate_init(struct fpu *fpu)
 		return;
 	}
 
-	memset(state, 0, fpu_kernel_xstate_size);
+	memset(state, 0, fpu_kernel_xstate_min_size);
 
 	if (static_cpu_has(X86_FEATURE_XSAVES))
 		fpstate_init_xstate(&state->xsave, xfeatures_mask_all);
@@ -233,7 +233,7 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src)
 	 * Don't let 'init optimized' areas of the XSAVE area
 	 * leak into the child task:
 	 */
-	memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+	memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_min_size);
 
 	/*
 	 * If the FPU registers are not current just memcpy() the state.
@@ -245,7 +245,7 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src)
 	 */
 	fpregs_lock();
 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
+		memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_min_size);
 
 	else if (!copy_fpregs_to_fpstate(dst_fpu))
 		copy_kernel_to_fpregs(dst_fpu);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 74e03e3bc20f..5dac97158030 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -130,13 +130,20 @@ static void __init fpu__init_system_generic(void)
 }
 
 /*
- * Size of the FPU context state. All tasks in the system use the
- * same context size, regardless of what portion they use.
- * This is inherent to the XSAVE architecture which puts all state
- * components into a single, continuous memory block:
+ * Size of the minimally allocated FPU context state. All threads have this amount
+ * of xstate buffer at minimum.
+ *
+ * This buffer is inherent to the XSAVE architecture which puts all state components
+ * into a single, continuous memory block:
+ */
+unsigned int fpu_kernel_xstate_min_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_min_size);
+
+/*
+ * Size of the maximum FPU context state. When using the compacted format, the buffer
+ * can be dynamically expanded to include some states up to this size.
  */
-unsigned int fpu_kernel_xstate_size;
-EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
+unsigned int fpu_kernel_xstate_max_size;
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -167,8 +174,10 @@ static void __init fpu__init_task_struct_size(void)
 	/*
 	 * Add back the dynamically-calculated register state
 	 * size.
+	 *
+	 * Use the minimum size as in-lined to the task_struct.
 	 */
-	task_size += fpu_kernel_xstate_size;
+	task_size += fpu_kernel_xstate_min_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -193,6 +202,7 @@ static void __init fpu__init_task_struct_size(void)
 static void __init fpu__init_system_xstate_size_legacy(void)
 {
 	static int on_boot_cpu __initdata = 1;
+	unsigned int size;
 
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
@@ -203,17 +213,17 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	 */
 
 	if (!boot_cpu_has(X86_FEATURE_FPU)) {
-		fpu_kernel_xstate_size = sizeof(struct swregs_state);
+		size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			fpu_kernel_xstate_size =
-				sizeof(struct fxregs_state);
+			size = sizeof(struct fxregs_state);
 		else
-			fpu_kernel_xstate_size =
-				sizeof(struct fregs_state);
+			size = sizeof(struct fregs_state);
 	}
 
-	fpu_user_xstate_size = fpu_kernel_xstate_size;
+	fpu_kernel_xstate_min_size = size;
+	fpu_kernel_xstate_max_size = size;
+	fpu_user_xstate_size = size;
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 414a13427934..b6d2706b6886 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -289,8 +289,8 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
 
 static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 {
+	int state_size = fpu_kernel_xstate_min_size;
 	struct user_i387_ia32_struct *envp = NULL;
-	int state_size = fpu_kernel_xstate_size;
 	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6620d0a3caff..2012b17b1793 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
  */
 static void do_extra_xstate_size_checks(void)
 {
-	int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+	int paranoid_min_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+	int paranoid_max_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
 	int i;
 
 	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		bool dynamic;
+
 		if (!xfeature_enabled(i))
 			continue;
 
+		dynamic = (xfeatures_mask_user_dynamic & BIT_ULL(i)) ? true : false;
+
 		check_xstate_against_struct(i);
 		/*
 		 * Supervisor state components can be managed only by
@@ -643,23 +648,32 @@ static void do_extra_xstate_size_checks(void)
 			XSTATE_WARN_ON(xfeature_is_supervisor(i));
 
 		/* Align from the end of the previous feature */
-		if (xfeature_is_aligned(i))
-			paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64);
+		if (xfeature_is_aligned(i)) {
+			paranoid_max_size = ALIGN(paranoid_max_size, 64);
+			if (!dynamic)
+				paranoid_min_size = ALIGN(paranoid_min_size, 64);
+		}
 		/*
 		 * The offset of a given state in the non-compacted
 		 * format is given to us in a CPUID leaf.  We check
 		 * them for being ordered (increasing offsets) in
 		 * setup_xstate_features().
 		 */
-		if (!using_compacted_format())
-			paranoid_xstate_size = xfeature_uncompacted_offset(i);
+		if (!using_compacted_format()) {
+			paranoid_max_size = xfeature_uncompacted_offset(i);
+			if (!dynamic)
+				paranoid_min_size = xfeature_uncompacted_offset(i);
+		}
 		/*
 		 * The compacted-format offset always depends on where
 		 * the previous state ended.
 		 */
-		paranoid_xstate_size += xfeature_size(i);
+		paranoid_max_size += xfeature_size(i);
+		if (!dynamic)
+			paranoid_min_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
+	XSTATE_WARN_ON(paranoid_max_size != fpu_kernel_xstate_max_size);
+	fpu_kernel_xstate_min_size = paranoid_min_size;
 }
 
 
@@ -744,27 +758,27 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
 static int __init init_xstate_size(void)
 {
 	/* Recompute the context size for enabled features: */
-	unsigned int possible_xstate_size;
+	unsigned int possible_max_xstate_size;
 	unsigned int xsave_size;
 
 	xsave_size = get_xsave_size();
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		possible_xstate_size = get_xsaves_size_no_dynamic();
+		possible_max_xstate_size = get_xsaves_size_no_dynamic();
 	else
-		possible_xstate_size = xsave_size;
-
-	/* Ensure we have the space to store all enabled: */
-	if (!is_supported_xstate_size(possible_xstate_size))
-		return -EINVAL;
+		possible_max_xstate_size = xsave_size;
 
 	/*
 	 * The size is OK, we are definitely going to use xsave,
 	 * make it known to the world that we need more space.
 	 */
-	fpu_kernel_xstate_size = possible_xstate_size;
+	fpu_kernel_xstate_max_size = possible_max_xstate_size;
 	do_extra_xstate_size_checks();
 
+	/* Ensure we have the supported in-line space: */
+	if (!is_supported_xstate_size(fpu_kernel_xstate_min_size))
+		return -EINVAL;
+
 	/*
 	 * User space is always in standard format.
 	 */
@@ -869,7 +883,7 @@ void __init fpu__init_system_xstate(void)
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask_all,
-		fpu_kernel_xstate_size,
+		fpu_kernel_xstate_max_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 	return;
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 145a7ac0c19a..326b16aefb06 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -96,6 +96,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return fpu__copy(dst, src);
 }
 
+void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
+{
+	*offset = offsetof(struct thread_struct, fpu.state);
+	*size = fpu_kernel_xstate_min_size;
+}
+
 /*
  * Free thread data structures etc..
  */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a087bbf252b6..4aecfba04bd3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9220,7 +9220,7 @@ static void kvm_save_current_fpu(struct fpu *fpu)
 	 */
 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
 		memcpy(&fpu->state, &current->thread.fpu.state,
-		       fpu_kernel_xstate_size);
+		       fpu_kernel_xstate_min_size);
 	else
 		copy_fpregs_to_fpstate(fpu);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate
       [not found] <20201223155717.19556-1-chang.seok.bae@intel.com>
                   ` (3 preceding siblings ...)
  2020-12-23 15:57 ` [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes Chang S. Bae
@ 2020-12-23 15:57 ` Chang S. Bae
  2021-01-07  8:41   ` Liu, Jing2
  2021-02-08 12:33   ` Borislav Petkov
  4 siblings, 2 replies; 20+ messages in thread
From: Chang S. Bae @ 2020-12-23 15:57 UTC (permalink / raw)
  To: bp, luto, tglx, mingo, x86
  Cc: len.brown, dave.hansen, jing2.liu, ravi.v.shankar, linux-kernel,
	chang.seok.bae, kvm

copy_xregs_to_kernel() used to save all user states in a kernel buffer.
When the dynamic user state is enabled, it becomes conditional which state
to be saved.

fpu->state_mask can indicate which state components are reserved to be
saved in XSAVE buffer. Use it as XSAVE's instruction mask to select states.

KVM used to save all xstate via copy_xregs_to_kernel(). Update KVM to set a
valid fpu->state_mask, which will be necessary to correctly handle dynamic
state buffers.

No functional change until the kernel supports dynamic user states.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v2:
* Updated the changelog to clarify the KVM code changes.
---
 arch/x86/include/asm/fpu/internal.h |  3 +--
 arch/x86/kernel/fpu/core.c          |  2 +-
 arch/x86/kvm/x86.c                  | 11 ++++++++---
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 67ffd1d7c95e..d409a6ae0c38 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -332,9 +332,8 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 /*
  * Save processor xstate to xsave area.
  */
-static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
+static inline void copy_xregs_to_kernel(struct xregs_state *xstate, u64 mask)
 {
-	u64 mask = xfeatures_mask_all;
 	u32 lmask = mask;
 	u32 hmask = mask >> 32;
 	int err;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8b9d3ec9ac46..5a12e4b22db2 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -99,7 +99,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu)
 	if (likely(use_xsave())) {
 		struct xregs_state *xsave = &xstate->xsave;
 
-		copy_xregs_to_kernel(xsave);
+		copy_xregs_to_kernel(xsave, fpu->state_mask);
 
 		/*
 		 * AVX512 state is tracked here because its use is
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4aecfba04bd3..93b5bacad67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9214,15 +9214,20 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 static void kvm_save_current_fpu(struct fpu *fpu)
 {
+	struct fpu *src_fpu = &current->thread.fpu;
+
 	/*
 	 * If the target FPU state is not resident in the CPU registers, just
 	 * memcpy() from current, else save CPU state directly to the target.
 	 */
-	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		memcpy(&fpu->state, &current->thread.fpu.state,
+	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
+		memcpy(&fpu->state, &src_fpu->state,
 		       fpu_kernel_xstate_min_size);
-	else
+	} else {
+		if (fpu->state_mask != src_fpu->state_mask)
+			fpu->state_mask = src_fpu->state_mask;
 		copy_fpregs_to_fpstate(fpu);
+	}
 }
 
 /* Swap (qemu) user FPU context for the guest FPU context. */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* RE: [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate
  2020-12-23 15:57 ` [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate Chang S. Bae
@ 2021-01-07  8:41   ` Liu, Jing2
  2021-01-07 18:40     ` Bae, Chang Seok
  2021-02-08 12:33   ` Borislav Petkov
  1 sibling, 1 reply; 20+ messages in thread
From: Liu, Jing2 @ 2021-01-07  8:41 UTC (permalink / raw)
  To: Bae, Chang Seok, bp, luto, tglx, mingo, x86
  Cc: Brown, Len, Hansen, Dave, Shankar, Ravi V, linux-kernel, kvm



-----Original Message-----
From: Bae, Chang Seok <chang.seok.bae@intel.com> 
Sent: Wednesday, December 23, 2020 11:57 PM
To: bp@suse.de; luto@kernel.org; tglx@linutronix.de; mingo@kernel.org; x86@kernel.org
Cc: Brown, Len <len.brown@intel.com>; Hansen, Dave <dave.hansen@intel.com>; Liu, Jing2 <jing2.liu@intel.com>; Shankar, Ravi V <ravi.v.shankar@intel.com>; linux-kernel@vger.kernel.org; Bae, Chang Seok <chang.seok.bae@intel.com>; kvm@vger.kernel.org
Subject: [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate

copy_xregs_to_kernel() used to save all user states in a kernel buffer.
When the dynamic user state is enabled, it becomes conditional which state to be saved.

fpu->state_mask can indicate which state components are reserved to be
saved in XSAVE buffer. Use it as XSAVE's instruction mask to select states.

KVM used to save all xstate via copy_xregs_to_kernel(). Update KVM to set a valid fpu->state_mask, which will be necessary to correctly handle dynamic state buffers.

See comments together below.

No functional change until the kernel supports dynamic user states.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
[...]
 		/*
 		 * AVX512 state is tracked here because its use is diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4aecfba04bd3..93b5bacad67a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9214,15 +9214,20 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 static void kvm_save_current_fpu(struct fpu *fpu)  {
+	struct fpu *src_fpu = &current->thread.fpu;
+
 	/*
 	 * If the target FPU state is not resident in the CPU registers, just
 	 * memcpy() from current, else save CPU state directly to the target.
 	 */
-	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		memcpy(&fpu->state, &current->thread.fpu.state,
+	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
+		memcpy(&fpu->state, &src_fpu->state,
 		       fpu_kernel_xstate_min_size);
For kvm, if we assume that it does not support dynamic features until this series,
memcpy for only fpu->state is correct. 
I think this kind of assumption is reasonable and we only make original xstate work.

-	else
+	} else {
+		if (fpu->state_mask != src_fpu->state_mask)
+			fpu->state_mask = src_fpu->state_mask;

Though dynamic feature is not supported in kvm now, this function still need
consider more things for fpu->state_mask.
I suggest that we can set it before if...else (for both cases) and not change other. 

Thanks,
Jing

 		copy_fpregs_to_fpstate(fpu);
+	}

 }

 
 /* Swap (qemu) user FPU context for the guest FPU context. */
--
2.17.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate
  2021-01-07  8:41   ` Liu, Jing2
@ 2021-01-07 18:40     ` Bae, Chang Seok
  2021-01-12  2:52       ` Liu, Jing2
  0 siblings, 1 reply; 20+ messages in thread
From: Bae, Chang Seok @ 2021-01-07 18:40 UTC (permalink / raw)
  To: Liu, Jing2
  Cc: bp, luto, tglx, mingo, x86, Brown, Len, Hansen, Dave, Shankar,
	Ravi V, linux-kernel, kvm


> On Jan 7, 2021, at 17:41, Liu, Jing2 <jing2.liu@intel.com> wrote:
> 
> static void kvm_save_current_fpu(struct fpu *fpu)  {
> +	struct fpu *src_fpu = &current->thread.fpu;
> +
> 	/*
> 	 * If the target FPU state is not resident in the CPU registers, just
> 	 * memcpy() from current, else save CPU state directly to the target.
> 	 */
> -	if (test_thread_flag(TIF_NEED_FPU_LOAD))
> -		memcpy(&fpu->state, &current->thread.fpu.state,
> +	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
> +		memcpy(&fpu->state, &src_fpu->state,
> 		       fpu_kernel_xstate_min_size);
> For kvm, if we assume that it does not support dynamic features until this series,
> memcpy for only fpu->state is correct. 
> I think this kind of assumption is reasonable and we only make original xstate work.
> 
> -	else
> +	} else {
> +		if (fpu->state_mask != src_fpu->state_mask)
> +			fpu->state_mask = src_fpu->state_mask;
> 
> Though dynamic feature is not supported in kvm now, this function still need
> consider more things for fpu->state_mask.

Can you elaborate this? Which path might be affected by fpu->state_mask
without dynamic state supported in KVM?

> I suggest that we can set it before if...else (for both cases) and not change other. 

I tried a minimum change here.  The fpu->state_mask value does not impact the
memcpy(). So, why do we need to change it for both?

Thanks,
Chang

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate
  2021-01-07 18:40     ` Bae, Chang Seok
@ 2021-01-12  2:52       ` Liu, Jing2
  2021-01-15  4:59         ` Bae, Chang Seok
  0 siblings, 1 reply; 20+ messages in thread
From: Liu, Jing2 @ 2021-01-12  2:52 UTC (permalink / raw)
  To: Bae, Chang Seok, Liu, Jing2
  Cc: bp, luto, tglx, mingo, x86, Brown, Len, Hansen, Dave, Shankar,
	Ravi V, linux-kernel, kvm


On 1/8/2021 2:40 AM, Bae, Chang Seok wrote:
>> On Jan 7, 2021, at 17:41, Liu, Jing2 <jing2.liu@intel.com> wrote:
>>
>> static void kvm_save_current_fpu(struct fpu *fpu)  {
>> +	struct fpu *src_fpu = &current->thread.fpu;
>> +
>> 	/*
>> 	 * If the target FPU state is not resident in the CPU registers, just
>> 	 * memcpy() from current, else save CPU state directly to the target.
>> 	 */
>> -	if (test_thread_flag(TIF_NEED_FPU_LOAD))
>> -		memcpy(&fpu->state, &current->thread.fpu.state,
>> +	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
>> +		memcpy(&fpu->state, &src_fpu->state,
>> 		       fpu_kernel_xstate_min_size);
>> For kvm, if we assume that it does not support dynamic features until this series,
>> memcpy for only fpu->state is correct.
>> I think this kind of assumption is reasonable and we only make original xstate work.
>>
>> -	else
>> +	} else {
>> +		if (fpu->state_mask != src_fpu->state_mask)
>> +			fpu->state_mask = src_fpu->state_mask;
>>
>> Though dynamic feature is not supported in kvm now, this function still need
>> consider more things for fpu->state_mask.
> Can you elaborate this? Which path might be affected by fpu->state_mask
> without dynamic state supported in KVM?
>
>> I suggest that we can set it before if...else (for both cases) and not change other.
> I tried a minimum change here.  The fpu->state_mask value does not impact the
> memcpy(). So, why do we need to change it for both?

Sure, what I'm considering is that "mask" is the first time introduced 
into "fpu",

representing the usage, so not only set it when needed, but also make it 
as a

representation, in case of anywhere using it (especially between the 
interval

of this series and kvm series in future).

Thanks,

Jing

> Thanks,
> Chang

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate
  2021-01-12  2:52       ` Liu, Jing2
@ 2021-01-15  4:59         ` Bae, Chang Seok
  2021-01-15  5:45           ` Liu, Jing2
  0 siblings, 1 reply; 20+ messages in thread
From: Bae, Chang Seok @ 2021-01-15  4:59 UTC (permalink / raw)
  To: Liu, Jing2
  Cc: Liu, Jing2, bp, luto, tglx, mingo, x86, Brown, Len, Hansen, Dave,
	Shankar, Ravi V, linux-kernel, kvm


> On Jan 11, 2021, at 18:52, Liu, Jing2 <jing2.liu@linux.intel.com> wrote:
> 
> On 1/8/2021 2:40 AM, Bae, Chang Seok wrote:
>>> On Jan 7, 2021, at 17:41, Liu, Jing2 <jing2.liu@intel.com> wrote:
>>> 
>>> static void kvm_save_current_fpu(struct fpu *fpu)  {
>>> +	struct fpu *src_fpu = &current->thread.fpu;
>>> +
>>> 	/*
>>> 	 * If the target FPU state is not resident in the CPU registers, just
>>> 	 * memcpy() from current, else save CPU state directly to the target.
>>> 	 */
>>> -	if (test_thread_flag(TIF_NEED_FPU_LOAD))
>>> -		memcpy(&fpu->state, &current->thread.fpu.state,
>>> +	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
>>> +		memcpy(&fpu->state, &src_fpu->state,
>>> 		       fpu_kernel_xstate_min_size);

<snip>

>>> -	else
>>> +	} else {
>>> +		if (fpu->state_mask != src_fpu->state_mask)
>>> +			fpu->state_mask = src_fpu->state_mask;
>>> 
>>> Though dynamic feature is not supported in kvm now, this function still need
>>> consider more things for fpu->state_mask.
>> Can you elaborate this? Which path might be affected by fpu->state_mask
>> without dynamic state supported in KVM?
>> 
>>> I suggest that we can set it before if...else (for both cases) and not change other.
>> I tried a minimum change here.  The fpu->state_mask value does not impact the
>> memcpy(). So, why do we need to change it for both?
> 
> Sure, what I'm considering is that "mask" is the first time introduced into "fpu",
> representing the usage, so not only set it when needed, but also make it as a
> representation, in case of anywhere using it (especially between the interval
> of this series and kvm series in future).

Thank you for the feedback. Sorry, I don't get any logical reason to set the
mask always here. Perhaps, KVM code can be updated like you mentioned when
supporting the dynamic states there.

Please let me know if I’m missing any functional issues.

Thanks,
Chang

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate
  2021-01-15  4:59         ` Bae, Chang Seok
@ 2021-01-15  5:45           ` Liu, Jing2
  0 siblings, 0 replies; 20+ messages in thread
From: Liu, Jing2 @ 2021-01-15  5:45 UTC (permalink / raw)
  To: Bae, Chang Seok
  Cc: Liu, Jing2, bp, luto, tglx, mingo, x86, Brown, Len, Hansen, Dave,
	Shankar, Ravi V, linux-kernel, kvm


On 1/15/2021 12:59 PM, Bae, Chang Seok wrote:
>> On Jan 11, 2021, at 18:52, Liu, Jing2 <jing2.liu@linux.intel.com> wrote:
>>
>> On 1/8/2021 2:40 AM, Bae, Chang Seok wrote:
>>>> On Jan 7, 2021, at 17:41, Liu, Jing2 <jing2.liu@intel.com> wrote:
>>>>
>>>> static void kvm_save_current_fpu(struct fpu *fpu)  {
>>>> +	struct fpu *src_fpu = &current->thread.fpu;
>>>> +
>>>> 	/*
>>>> 	 * If the target FPU state is not resident in the CPU registers, just
>>>> 	 * memcpy() from current, else save CPU state directly to the target.
>>>> 	 */
>>>> -	if (test_thread_flag(TIF_NEED_FPU_LOAD))
>>>> -		memcpy(&fpu->state, &current->thread.fpu.state,
>>>> +	if (test_thread_flag(TIF_NEED_FPU_LOAD)) {
>>>> +		memcpy(&fpu->state, &src_fpu->state,
>>>> 		       fpu_kernel_xstate_min_size);
> <snip>
>
>>>> -	else
>>>> +	} else {
>>>> +		if (fpu->state_mask != src_fpu->state_mask)
>>>> +			fpu->state_mask = src_fpu->state_mask;
>>>>
>>>> Though dynamic feature is not supported in kvm now, this function still need
>>>> consider more things for fpu->state_mask.
>>> Can you elaborate this? Which path might be affected by fpu->state_mask
>>> without dynamic state supported in KVM?
>>>
>>>> I suggest that we can set it before if...else (for both cases) and not change other.
>>> I tried a minimum change here.  The fpu->state_mask value does not impact the
>>> memcpy(). So, why do we need to change it for both?
>> Sure, what I'm considering is that "mask" is the first time introduced into "fpu",
>> representing the usage, so not only set it when needed, but also make it as a
>> representation, in case of anywhere using it (especially between the interval
>> of this series and kvm series in future).
> Thank you for the feedback. Sorry, I don't get any logical reason to set the
> mask always here.

Sure, I'd like to see if fx_init()->memset is the case,

though maybe no hurt so far in test.

Thanks,

Jing

>   Perhaps, KVM code can be updated like you mentioned when
> supporting the dynamic states there.
>
> Please let me know if I’m missing any functional issues.
>
> Thanks,
> Chang

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 01/21] x86/fpu/xstate: Modify initialization helper to handle both static and dynamic buffers
  2020-12-23 15:56 ` [PATCH v3 01/21] x86/fpu/xstate: Modify initialization helper to handle both static and dynamic buffers Chang S. Bae
@ 2021-01-15 12:40   ` Borislav Petkov
  0 siblings, 0 replies; 20+ messages in thread
From: Borislav Petkov @ 2021-01-15 12:40 UTC (permalink / raw)
  To: Chang S. Bae
  Cc: luto, tglx, mingo, x86, len.brown, dave.hansen, jing2.liu,
	ravi.v.shankar, linux-kernel, kvm

On Wed, Dec 23, 2020 at 07:56:57AM -0800, Chang S. Bae wrote:
> In preparation for dynamic xstate buffer expansion, update the buffer
> initialization function parameters to equally handle static in-line xstate
> buffer, as well as dynamically allocated xstate buffer.
> 
> init_fpstate is a special case, which is indicated by a null pointer
> parameter to fpstate_init().
> 
> Also, fpstate_init_xstate() now accepts the state component bitmap to
> configure XCOMP_BV for the compacted format.
> 
> No functional change.

Much better, thanks!

> diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
> index eb86a2b831b1..f23e5ffbb307 100644
> --- a/arch/x86/kernel/fpu/core.c
> +++ b/arch/x86/kernel/fpu/core.c
> @@ -191,8 +191,16 @@ static inline void fpstate_init_fstate(struct fregs_state *fp)
>  	fp->fos = 0xffff0000u;
>  }
>  
> -void fpstate_init(union fpregs_state *state)
> +/* A null pointer parameter indicates init_fpstate. */

Use kernel-doc comment style instead:

/**
 * ..
 *
 * @fpu: If NULL, use init_fpstate
 */

> +void fpstate_init(struct fpu *fpu)
>  {

...

-- 
Regards/Gruss,
    Boris.

SUSE Software Solutions Germany GmbH, GF: Felix Imendörffer, HRB 36809, AG Nürnberg

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 03/21] x86/fpu/xstate: Modify address finders to handle both static and dynamic buffers
  2020-12-23 15:56 ` [PATCH v3 03/21] x86/fpu/xstate: Modify address finders " Chang S. Bae
@ 2021-01-15 13:06   ` Borislav Petkov
  0 siblings, 0 replies; 20+ messages in thread
From: Borislav Petkov @ 2021-01-15 13:06 UTC (permalink / raw)
  To: Chang S. Bae
  Cc: luto, tglx, mingo, x86, len.brown, dave.hansen, jing2.liu,
	ravi.v.shankar, linux-kernel, kvm

On Wed, Dec 23, 2020 at 07:56:59AM -0800, Chang S. Bae wrote:
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index 6156dad0feb6..2010c31d25e1 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -894,15 +894,24 @@ void fpu__resume_cpu(void)
>   * Given an xstate feature nr, calculate where in the xsave
>   * buffer the state is.  Callers should ensure that the buffer
>   * is valid.
> + *
> + * A null pointer parameter indicates to use init_fpstate.
>   */

kernel-doc style comment pls.

-- 
Regards/Gruss,
    Boris.

SUSE Software Solutions Germany GmbH, GF: Felix Imendörffer, HRB 36809, AG Nürnberg

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 04/21] x86/fpu/xstate: Modify context switch helpers to handle both static and dynamic buffers
  2020-12-23 15:57 ` [PATCH v3 04/21] x86/fpu/xstate: Modify context switch helpers " Chang S. Bae
@ 2021-01-15 13:18   ` Borislav Petkov
  2021-01-19 18:49     ` Bae, Chang Seok
  0 siblings, 1 reply; 20+ messages in thread
From: Borislav Petkov @ 2021-01-15 13:18 UTC (permalink / raw)
  To: Chang S. Bae
  Cc: luto, tglx, mingo, x86, len.brown, dave.hansen, jing2.liu,
	ravi.v.shankar, linux-kernel, kvm

On Wed, Dec 23, 2020 at 07:57:00AM -0800, Chang S. Bae wrote:
> In preparation for dynamic xstate buffer expansion, update the xstate
> restore function parameters to equally handle static in-line xstate buffer,
> as well as dynamically allocated xstate buffer.

Ok, I see what you've done: you've slightly changed that same
formulation depending on what the patch is doing. I need to read very
carefully.

What I would've written is:

"Have all functions handling FPU state take a struct fpu * pointer in
preparation for dynamic state buffer support."

Plain and simple.

-- 
Regards/Gruss,
    Boris.

SUSE Software Solutions Germany GmbH, GF: Felix Imendörffer, HRB 36809, AG Nürnberg

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 04/21] x86/fpu/xstate: Modify context switch helpers to handle both static and dynamic buffers
  2021-01-15 13:18   ` Borislav Petkov
@ 2021-01-19 18:49     ` Bae, Chang Seok
  0 siblings, 0 replies; 20+ messages in thread
From: Bae, Chang Seok @ 2021-01-19 18:49 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Thomas Gleixner, Ingo Molnar, x86-ml, Brown,
	Len, Hansen, Dave, Liu, Jing2, Shankar, Ravi V, linux-kernel,
	kvm

On Jan 15, 2021, at 05:18, Borislav Petkov <bp@suse.de> wrote:
> 
> What I would've written is:
> 
> "Have all functions handling FPU state take a struct fpu * pointer in
> preparation for dynamic state buffer support."
> 
> Plain and simple.

Thank you. I will apply this on my next revision.

Chang

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes
  2020-12-23 15:57 ` [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes Chang S. Bae
@ 2021-01-22 11:44   ` Borislav Petkov
  2021-01-27  1:23     ` Bae, Chang Seok
  0 siblings, 1 reply; 20+ messages in thread
From: Borislav Petkov @ 2021-01-22 11:44 UTC (permalink / raw)
  To: Chang S. Bae
  Cc: luto, tglx, mingo, x86, len.brown, dave.hansen, jing2.liu,
	ravi.v.shankar, linux-kernel, kvm

On Wed, Dec 23, 2020 at 07:57:02AM -0800, Chang S. Bae wrote:
> The xstate buffer is currently in-line with static size. To accommodatea

"in-line" doesn't fit in this context, especially since "inline"
is a keyword with another meaning. Please replace it with a better
formulation in this patch.

> dynamic user xstates, introduce variables to represent the maximum and
> minimum buffer sizes.
> 
> do_extra_xstate_size_checks() calculates the maximum xstate size and sanity
> checks it with CPUID. It calculates the static in-line buffer size by
> excluding the dynamic user states from the maximum xstate size.
> 
> No functional change, until the kernel enables dynamic buffer support.
> 
> Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
> Reviewed-by: Len Brown <len.brown@intel.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: kvm@vger.kernel.org
> ---
> Changes from v2:
> * Updated the changelog with task->fpu removed. (Boris Petkov)
> * Renamed the in-line size variable.
> * Updated some code comments.
> ---
>  arch/x86/include/asm/processor.h | 10 +++----
>  arch/x86/kernel/fpu/core.c       |  6 ++---
>  arch/x86/kernel/fpu/init.c       | 36 ++++++++++++++++---------
>  arch/x86/kernel/fpu/signal.c     |  2 +-
>  arch/x86/kernel/fpu/xstate.c     | 46 +++++++++++++++++++++-----------
>  arch/x86/kernel/process.c        |  6 +++++
>  arch/x86/kvm/x86.c               |  2 +-
>  7 files changed, 67 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 82a08b585818..c9c608f8af91 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -477,7 +477,8 @@ DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
>  DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
>  #endif	/* X86_64 */
>  
> -extern unsigned int fpu_kernel_xstate_size;
> +extern unsigned int fpu_kernel_xstate_min_size;
> +extern unsigned int fpu_kernel_xstate_max_size;

Is it time to group this into a struct so that all those settings go
together instead in single variables?

struct fpu_xstate {
	unsigned int min_size, max_size;
	unsigned int user_size;
	...
};

etc.

>  extern unsigned int fpu_user_xstate_size;
>  
>  struct perf_event;
> @@ -545,12 +546,7 @@ struct thread_struct {
>  };
>  
>  /* Whitelist the FPU state from the task_struct for hardened usercopy. */
> -static inline void arch_thread_struct_whitelist(unsigned long *offset,
> -						unsigned long *size)
> -{
> -	*offset = offsetof(struct thread_struct, fpu.state);
> -	*size = fpu_kernel_xstate_size;
> -}
> +extern void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size);

What's that move for?

> diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
> index 74e03e3bc20f..5dac97158030 100644
> --- a/arch/x86/kernel/fpu/init.c
> +++ b/arch/x86/kernel/fpu/init.c
> @@ -130,13 +130,20 @@ static void __init fpu__init_system_generic(void)
>  }
>  
>  /*
> - * Size of the FPU context state. All tasks in the system use the
> - * same context size, regardless of what portion they use.
> - * This is inherent to the XSAVE architecture which puts all state
> - * components into a single, continuous memory block:
> + * Size of the minimally allocated FPU context state. All threads have this amount
> + * of xstate buffer at minimum.
> + *
> + * This buffer is inherent to the XSAVE architecture which puts all state components
> + * into a single, continuous memory block:
> + */
> +unsigned int fpu_kernel_xstate_min_size;
> +EXPORT_SYMBOL_GPL(fpu_kernel_xstate_min_size);
> +
> +/*
> + * Size of the maximum FPU context state. When using the compacted format, the buffer
> + * can be dynamically expanded to include some states up to this size.
>   */
> -unsigned int fpu_kernel_xstate_size;
> -EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
> +unsigned int fpu_kernel_xstate_max_size;

And since we're probably going to start querying different aspects about
the buffer, instead of exporting all kinds of variables in the future,
maybe this should be a single exported function called

get_xstate_buffer_attr(typedef buffer_attr)

which gives you what you wanna know about it... For example:

get_xstate_buffer_attr(MIN_SIZE);
get_xstate_buffer_attr(MAX_SIZE);
...

> diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> index 414a13427934..b6d2706b6886 100644
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -289,8 +289,8 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
>  
>  static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
>  {
> +	int state_size = fpu_kernel_xstate_min_size;
>  	struct user_i387_ia32_struct *envp = NULL;
> -	int state_size = fpu_kernel_xstate_size;
>  	int ia32_fxstate = (buf != buf_fx);
>  	struct task_struct *tsk = current;
>  	struct fpu *fpu = &tsk->thread.fpu;
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index 6620d0a3caff..2012b17b1793 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
>   */

<-- There's a comment over this function that might need adjustment.

>  static void do_extra_xstate_size_checks(void)
>  {
> -	int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
> +	int paranoid_min_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
> +	int paranoid_max_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
>  	int i;

...

> @@ -744,27 +758,27 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
>  static int __init init_xstate_size(void)
>  {
>  	/* Recompute the context size for enabled features: */
> -	unsigned int possible_xstate_size;
> +	unsigned int possible_max_xstate_size;
>  	unsigned int xsave_size;
>  
>  	xsave_size = get_xsave_size();
>  
>  	if (boot_cpu_has(X86_FEATURE_XSAVES))

using_compacted_format()

FPU code needs to agree on one helper and not use both. :-\

> -		possible_xstate_size = get_xsaves_size_no_dynamic();
> +		possible_max_xstate_size = get_xsaves_size_no_dynamic();
>  	else
> -		possible_xstate_size = xsave_size;
> -
> -	/* Ensure we have the space to store all enabled: */
> -	if (!is_supported_xstate_size(possible_xstate_size))
> -		return -EINVAL;
> +		possible_max_xstate_size = xsave_size;
>  
>  	/*
>  	 * The size is OK, we are definitely going to use xsave,
>  	 * make it known to the world that we need more space.
>  	 */
> -	fpu_kernel_xstate_size = possible_xstate_size;
> +	fpu_kernel_xstate_max_size = possible_max_xstate_size;
>  	do_extra_xstate_size_checks();
>  
> +	/* Ensure we have the supported in-line space: */

Who's "we"?

> +	if (!is_supported_xstate_size(fpu_kernel_xstate_min_size))
> +		return -EINVAL;
> +
>  	/*
>  	 * User space is always in standard format.
>  	 */

-- 
Regards/Gruss,
    Boris.

SUSE Software Solutions Germany GmbH, GF: Felix Imendörffer, HRB 36809, AG Nürnberg

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes
  2021-01-22 11:44   ` Borislav Petkov
@ 2021-01-27  1:23     ` Bae, Chang Seok
  2021-01-27  9:38       ` Borislav Petkov
  0 siblings, 1 reply; 20+ messages in thread
From: Bae, Chang Seok @ 2021-01-27  1:23 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Thomas Gleixner, mingo, x86, Brown, Len, Hansen,
	Dave, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm

On Jan 22, 2021, at 03:44, Borislav Petkov <bp@suse.de> wrote:
> On Wed, Dec 23, 2020 at 07:57:02AM -0800, Chang S. Bae wrote:
>> The xstate buffer is currently in-line with static size. To accommodatea
> 
> "in-line" doesn't fit in this context, especially since "inline"
> is a keyword with another meaning. Please replace it with a better
> formulation in this patch.

How about ‘embedded’?,
    “The xstate buffer is currently embedded into struct fpu with static size."

>> -extern unsigned int fpu_kernel_xstate_size;
>> +extern unsigned int fpu_kernel_xstate_min_size;
>> +extern unsigned int fpu_kernel_xstate_max_size;
> 
> Is it time to group this into a struct so that all those settings go
> together instead in single variables?
> 
> struct fpu_xstate {
> 	unsigned int min_size, max_size;
> 	unsigned int user_size;
> 	...
> };
> 
> etc.

<snip>

> And since we're probably going to start querying different aspects about
> the buffer, instead of exporting all kinds of variables in the future,
> maybe this should be a single exported function called
> 
> get_xstate_buffer_attr(typedef buffer_attr)
> 
> which gives you what you wanna know about it... For example:
> 
> get_xstate_buffer_attr(MIN_SIZE);
> get_xstate_buffer_attr(MAX_SIZE);
> ...

Okay. I will prepare a separate cleanup patch that can be applied at the end
of the series. Will post the change in this thread at first.

>> /* Whitelist the FPU state from the task_struct for hardened usercopy. */
>> -static inline void arch_thread_struct_whitelist(unsigned long *offset,
>> -						unsigned long *size)
>> -{
>> -	*offset = offsetof(struct thread_struct, fpu.state);
>> -	*size = fpu_kernel_xstate_size;
>> -}
>> +extern void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
> 
> What's that move for?

One of my drafts had some internal helper to be called in there. No reason
prior to applying the get_xstate_buffer_attr() helper. But with it, better to
move this out of this header file I think.

>> @@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
>>  */
> 
> <-- There's a comment over this function that might need adjustment.

Do you mean an empty line? (Just want to clarify.)

>> static void do_extra_xstate_size_checks(void)
>> {

<snip>

>> 	if (boot_cpu_has(X86_FEATURE_XSAVES))
> 
> using_compacted_format()
> 
> FPU code needs to agree on one helper and not use both. :-\

Agreed. I will prepare a patch. At least will post the diff here.

<snip>

>> +	/* Ensure we have the supported in-line space: */
> 
> Who's "we"?

How about:
    “Ensure the size fits in the statically-allocated buffer:"

>> +	if (!is_supported_xstate_size(fpu_kernel_xstate_min_size))
>> +		return -EINVAL;

No excuse, just pointing out the upstream code has “we” there [1].

Thanks,
Chang

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kernel/fpu/xstate.c#n752


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes
  2021-01-27  1:23     ` Bae, Chang Seok
@ 2021-01-27  9:38       ` Borislav Petkov
  2021-02-03  2:54         ` Bae, Chang Seok
  0 siblings, 1 reply; 20+ messages in thread
From: Borislav Petkov @ 2021-01-27  9:38 UTC (permalink / raw)
  To: Bae, Chang Seok
  Cc: Andy Lutomirski, Thomas Gleixner, mingo, x86, Brown, Len, Hansen,
	Dave, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm

On Wed, Jan 27, 2021 at 01:23:35AM +0000, Bae, Chang Seok wrote:
> How about ‘embedded’?,
>     “The xstate buffer is currently embedded into struct fpu with static size."

Better.

> Okay. I will prepare a separate cleanup patch that can be applied at the end
> of the series. Will post the change in this thread at first.

No, this is not how this works. Imagine you pile up a patch at the end
for each review feedback you've gotten. No, this will be an insane churn
and an unreviewable mess.

What you do is you rework your patches like everyone else.

Also, thinking about this more, I'm wondering if all those
xstate-related attributes shouldn't be part of struct fpu instead of
being scattered around like that.

That thing - struct fpu * - gets passed in everywhere anyway so all that
min_size, max_size, ->xstate_ptr and whatever, looks like it wants to be
part of struct fpu. Then maybe you won't need the accessors...

> One of my drafts had some internal helper to be called in there. No reason
> prior to applying the get_xstate_buffer_attr() helper. But with it, better to
> move this out of this header file I think.

See above.

> 
> >> @@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
> >>  */
> > 
> > <-- There's a comment over this function that might need adjustment.
> 
> Do you mean an empty line? (Just want to clarify.)

No, I mean this comment:

 * Dynamic XSAVE features allocate their own buffers and are not
 * covered by these checks. Only the size of the buffer for task->fpu
 * is checked here.

That probably needs adjusting as you do set min and max size here now
for the dynamic buffer.

> Agreed. I will prepare a patch. At least will post the diff here.

You can send it separately from this patchset, ontop of current
tip/master, so that I can take it now.

> How about:
>     “Ensure the size fits in the statically-allocated buffer:"

Yep.

> No excuse, just pointing out the upstream code has “we” there [1].

Yeah, I know. :-\

But considering how many parties develop the kernel now, "we" becomes
really ambiguous.

Thx.

-- 
Regards/Gruss,
    Boris.

SUSE Software Solutions Germany GmbH, GF: Felix Imendörffer, HRB 36809, AG Nürnberg

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes
  2021-01-27  9:38       ` Borislav Petkov
@ 2021-02-03  2:54         ` Bae, Chang Seok
  0 siblings, 0 replies; 20+ messages in thread
From: Bae, Chang Seok @ 2021-02-03  2:54 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Thomas Gleixner, mingo, x86, Brown, Len, Hansen,
	Dave, Liu, Jing2, Shankar, Ravi V, linux-kernel, kvm

On Jan 27, 2021, at 01:38, Borislav Petkov <bp@suse.de> wrote:
> On Wed, Jan 27, 2021 at 01:23:35AM +0000, Bae, Chang Seok wrote:
>> Okay. I will prepare a separate cleanup patch that can be applied at the end
>> of the series. Will post the change in this thread at first.
> 
> No, this is not how this works. Imagine you pile up a patch at the end
> for each review feedback you've gotten. No, this will be an insane churn
> and an unreviewable mess.
> 
> What you do is you rework your patches like everyone else.

Yeah, it makes sense. I will post v4.

> Also, thinking about this more, I'm wondering if all those
> xstate-related attributes shouldn't be part of struct fpu instead of
> being scattered around like that.
> 
> That thing - struct fpu * - gets passed in everywhere anyway so all that
> min_size, max_size, ->xstate_ptr and whatever, looks like it wants to be
> part of struct fpu. Then maybe you won't need the accessors...

Well, min_size and max_size are not task-specific. So, it will be wasteful to
include in struct fpu.

I will follow your suggestion to add new helpers to access the size values,
instead of exporting them.

>>>> @@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
>>>> */
>>> 
>>> <-- There's a comment over this function that might need adjustment.
>> 
>> Do you mean an empty line? (Just want to clarify.)
> 
> No, I mean this comment:
> 
> * Dynamic XSAVE features allocate their own buffers and are not
> * covered by these checks. Only the size of the buffer for task->fpu
> * is checked here.
> 
> That probably needs adjusting as you do set min and max size here now
> for the dynamic buffer.

Oh, I see. Thank you.

>> Agreed. I will prepare a patch. At least will post the diff here.
> 
> You can send it separately from this patchset, ontop of current
> tip/master, so that I can take it now.

Posted, [1]. After all, the proposal is to remove the helper.

Thanks,
Chang

[1] https://lore.kernel.org/lkml/20210203024052.15789-1-chang.seok.bae@intel.com/

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate
  2020-12-23 15:57 ` [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate Chang S. Bae
  2021-01-07  8:41   ` Liu, Jing2
@ 2021-02-08 12:33   ` Borislav Petkov
  2021-02-09 15:48     ` Bae, Chang Seok
  1 sibling, 1 reply; 20+ messages in thread
From: Borislav Petkov @ 2021-02-08 12:33 UTC (permalink / raw)
  To: Chang S. Bae
  Cc: luto, tglx, mingo, x86, len.brown, dave.hansen, jing2.liu,
	ravi.v.shankar, linux-kernel, kvm

On Wed, Dec 23, 2020 at 07:57:06AM -0800, Chang S. Bae wrote:
> copy_xregs_to_kernel() used to save all user states in a kernel buffer.
> When the dynamic user state is enabled, it becomes conditional which state
> to be saved.
> 
> fpu->state_mask can indicate which state components are reserved to be
> saved in XSAVE buffer. Use it as XSAVE's instruction mask to select states.
> 
> KVM used to save all xstate via copy_xregs_to_kernel(). Update KVM to set a
> valid fpu->state_mask, which will be necessary to correctly handle dynamic
> state buffers.

All this commit message should say is something along the lines of
"extend copy_xregs_to_kernel() to receive a mask argument of which
states to save, in preparation of dynamic states handling."

> No functional change until the kernel supports dynamic user states.

Same comment as before.

-- 
Regards/Gruss,
    Boris.

SUSE Software Solutions Germany GmbH, GF: Felix Imendörffer, HRB 36809, AG Nürnberg

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate
  2021-02-08 12:33   ` Borislav Petkov
@ 2021-02-09 15:48     ` Bae, Chang Seok
  0 siblings, 0 replies; 20+ messages in thread
From: Bae, Chang Seok @ 2021-02-09 15:48 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, tglx, mingo, x86, Brown, Len, Hansen, Dave, Liu,
	Jing2, Shankar, Ravi V, linux-kernel, kvm

On Feb 8, 2021, at 04:33, Borislav Petkov <bp@suse.de> wrote:
> On Wed, Dec 23, 2020 at 07:57:06AM -0800, Chang S. Bae wrote:
>> copy_xregs_to_kernel() used to save all user states in a kernel buffer.
>> When the dynamic user state is enabled, it becomes conditional which state
>> to be saved.
>> 
>> fpu->state_mask can indicate which state components are reserved to be
>> saved in XSAVE buffer. Use it as XSAVE's instruction mask to select states.
>> 
>> KVM used to save all xstate via copy_xregs_to_kernel(). Update KVM to set a
>> valid fpu->state_mask, which will be necessary to correctly handle dynamic
>> state buffers.
> 
> All this commit message should say is something along the lines of
> "extend copy_xregs_to_kernel() to receive a mask argument of which
> states to save, in preparation of dynamic states handling."

Yes, I will change like that. Thanks.

>> No functional change until the kernel supports dynamic user states.
> 
> Same comment as before.

This needs to be removed as per your comment [1].

Chang

[1] https://lore.kernel.org/lkml/20210209124906.GC15909@zn.tnic/

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-02-09 15:49 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20201223155717.19556-1-chang.seok.bae@intel.com>
2020-12-23 15:56 ` [PATCH v3 01/21] x86/fpu/xstate: Modify initialization helper to handle both static and dynamic buffers Chang S. Bae
2021-01-15 12:40   ` Borislav Petkov
2020-12-23 15:56 ` [PATCH v3 03/21] x86/fpu/xstate: Modify address finders " Chang S. Bae
2021-01-15 13:06   ` Borislav Petkov
2020-12-23 15:57 ` [PATCH v3 04/21] x86/fpu/xstate: Modify context switch helpers " Chang S. Bae
2021-01-15 13:18   ` Borislav Petkov
2021-01-19 18:49     ` Bae, Chang Seok
2020-12-23 15:57 ` [PATCH v3 06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes Chang S. Bae
2021-01-22 11:44   ` Borislav Petkov
2021-01-27  1:23     ` Bae, Chang Seok
2021-01-27  9:38       ` Borislav Petkov
2021-02-03  2:54         ` Bae, Chang Seok
2020-12-23 15:57 ` [PATCH v3 10/21] x86/fpu/xstate: Update xstate save function to support dynamic xstate Chang S. Bae
2021-01-07  8:41   ` Liu, Jing2
2021-01-07 18:40     ` Bae, Chang Seok
2021-01-12  2:52       ` Liu, Jing2
2021-01-15  4:59         ` Bae, Chang Seok
2021-01-15  5:45           ` Liu, Jing2
2021-02-08 12:33   ` Borislav Petkov
2021-02-09 15:48     ` Bae, Chang Seok

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).