linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] x86/fpu/state: Fix XSAVES issues - Part 1
@ 2016-05-20 17:47 Yu-cheng Yu
  2016-05-20 17:47 ` [PATCH 1/4] x86/fpu/xstate: Define and use fpu_user_xstate_size Yu-cheng Yu
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Yu-cheng Yu @ 2016-05-20 17:47 UTC (permalink / raw)
  To: linux-kernel, x86, H. Peter Anvin, Thomas Gleixner, Ingo Molnar
  Cc: Dave Hansen, Andy Lutomirski, Sai Praneeth Prakhya,
	Ravi V. Shankar, Fenghua Yu, Yu-cheng Yu

** Based on tip/master **

This is Part 1 of previous 13 XSAVES patches. Break it down to
smaller series. There are no code changes; only minor fixes in
the titles.

Fenghua Yu (3):
  x86/fpu/xstate: Define and use fpu_user_xstate_size
  x86/fpu/xstate: Rename xstate_size to fpu_kernel_xstate_size to
    distinguish from fpu_user_xstate_size
  x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for
    init optimization

Yu-cheng Yu (1):
  x86/fpu/xstate: Copy xstate registers directly to signal frame when
    compacted format is in use

 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/include/asm/processor.h  |  3 +-
 arch/x86/kernel/fpu/core.c        |  7 +--
 arch/x86/kernel/fpu/init.c        | 21 +++++----
 arch/x86/kernel/fpu/signal.c      | 32 ++++++++++----
 arch/x86/kernel/fpu/xstate.c      | 92 +++++++++++++++++++++++----------------
 6 files changed, 98 insertions(+), 59 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/4] x86/fpu/xstate: Define and use fpu_user_xstate_size
  2016-05-20 17:47 [PATCH 0/4] x86/fpu/state: Fix XSAVES issues - Part 1 Yu-cheng Yu
@ 2016-05-20 17:47 ` Yu-cheng Yu
  2016-06-17 12:20   ` [tip:x86/fpu] x86/fpu/xstate: Define and use 'fpu_user_xstate_size' tip-bot for Fenghua Yu
  2016-06-18  8:12   ` tip-bot for Fenghua Yu
  2016-05-20 17:47 ` [PATCH 2/4] x86/fpu/xstate: Rename xstate_size to fpu_kernel_xstate_size to distinguish from fpu_user_xstate_size Yu-cheng Yu
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 13+ messages in thread
From: Yu-cheng Yu @ 2016-05-20 17:47 UTC (permalink / raw)
  To: linux-kernel, x86, H. Peter Anvin, Thomas Gleixner, Ingo Molnar
  Cc: Dave Hansen, Andy Lutomirski, Sai Praneeth Prakhya,
	Ravi V. Shankar, Fenghua Yu, Yu-cheng Yu

From: Fenghua Yu <fenghua.yu@intel.com>

The kernel xstate area can be in standard or compacted format;
it is always in standard format for user mode. When XSAVES is
enabled, the kernel uses the compacted format and it is necessary
to use a separate fpu_user_xstate_size for signal/ptrace frames.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
[yu-cheng.yu@intel.com: rebase to current, rename to fpu_user_xstate_size]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/fpu/xstate.h |  1 -
 arch/x86/include/asm/processor.h  |  1 +
 arch/x86/kernel/fpu/init.c        |  5 ++-
 arch/x86/kernel/fpu/signal.c      | 27 ++++++++++----
 arch/x86/kernel/fpu/xstate.c      | 76 ++++++++++++++++++++++++---------------
 5 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 38951b0..16df2c4 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -39,7 +39,6 @@
 #define REX_PREFIX
 #endif
 
-extern unsigned int xstate_size;
 extern u64 xfeatures_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3..0a16a16 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -368,6 +368,7 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
 extern unsigned int xstate_size;
+extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index aacfd7a..5b1928c 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate_size based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -226,6 +226,9 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		else
 			xstate_size = sizeof(struct fregs_state);
 	}
+
+	fpu_user_xstate_size = xstate_size;
+
 	/*
 	 * Quirk: we don't yet handle the XSAVES* instructions
 	 * correctly, as we don't correctly convert between
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 31c6a60..23572a9 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -31,7 +31,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf,
 	/* Check for the first magic field and other error scenarios. */
 	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
 	    fx_sw->xstate_size < min_xstate_size ||
-	    fx_sw->xstate_size > xstate_size ||
+	    fx_sw->xstate_size > fpu_user_xstate_size ||
 	    fx_sw->xstate_size > fx_sw->extended_size)
 		return -1;
 
@@ -88,7 +88,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 	if (!use_xsave())
 		return err;
 
-	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+	err |= __put_user(FP_XSTATE_MAGIC2,
+			  (__u32 *)(buf + fpu_user_xstate_size));
 
 	/*
 	 * Read the xfeatures which we copied (directly from the cpu or
@@ -125,7 +126,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
 	else
 		err = copy_fregs_to_user((struct fregs_state __user *) buf);
 
-	if (unlikely(err) && __clear_user(buf, xstate_size))
+	if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
 		err = -EFAULT;
 	return err;
 }
@@ -175,8 +176,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 		if (ia32_fxstate)
 			copy_fxregs_to_kernel(&tsk->thread.fpu);
 	} else {
+		/*
+		 * It is a *bug* if kernel uses compacted-format for xsave
+		 * area and we copy it out directly to a signal frame. It
+		 * should have been handled above by saving the registers
+		 * directly.
+		 */
+		if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+			WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
+			return -1;
+		}
+
 		fpstate_sanitize_xstate(&tsk->thread.fpu);
-		if (__copy_to_user(buf_fx, xsave, xstate_size))
+		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
 
@@ -341,7 +353,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 static inline int xstate_sigframe_size(void)
 {
-	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+	return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+			fpu_user_xstate_size;
 }
 
 /*
@@ -385,12 +398,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+	int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xfeatures = xfeatures_mask;
-	fx_sw_reserved.xstate_size = xstate_size;
+	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
 
 	if (config_enabled(CONFIG_IA32_EMULATION) ||
 	    config_enabled(CONFIG_X86_32)) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4ea2a59..9c4da35 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -44,6 +44,13 @@ static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] =
 static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
 
 /*
+ * The XSAVE area of kernel can be in standard or compacted format;
+ * it is always in standard format for user mode. This is the user
+ * mode standard format size used for signal and ptrace frames.
+ */
+unsigned int fpu_user_xstate_size;
+
+/*
  * Clear all of the X86_FEATURE_* bits that are unavailable
  * when the CPU has no XSAVE support.
  */
@@ -171,7 +178,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	 */
 	while (xfeatures) {
 		if (xfeatures & 0x1) {
-			int offset = xstate_offsets[feature_bit];
+			int offset = xstate_comp_offsets[feature_bit];
 			int size = xstate_sizes[feature_bit];
 
 			memcpy((void *)fx + offset,
@@ -533,8 +540,9 @@ static void do_extra_xstate_size_checks(void)
 	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
 }
 
+
 /*
- * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0/xfeatures_mask.
  *
  * Note the SDM's wording here.  "sub-function 0" only enumerates
  * the size of the *user* states.  If we use it to size a buffer
@@ -544,34 +552,33 @@ static void do_extra_xstate_size_checks(void)
  * Note that we do not currently set any bits on IA32_XSS so
  * 'XCR0 | IA32_XSS == XCR0' for now.
  */
-static unsigned int __init calculate_xstate_size(void)
+static unsigned int __init get_xsaves_size(void)
 {
 	unsigned int eax, ebx, ecx, edx;
-	unsigned int calculated_xstate_size;
+	/*
+	 * - CPUID function 0DH, sub-function 1:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVES instruction for an XSAVE area
+	 *    containing all the state components
+	 *    corresponding to bits currently set in
+	 *    XCR0 | IA32_XSS.
+	 */
+	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
 
-	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
-		/*
-		 * - CPUID function 0DH, sub-function 0:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVE instruction for an XSAVE area
-		 *    containing all the *user* state components
-		 *    corresponding to bits currently set in XCR0.
-		 */
-		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	} else {
-		/*
-		 * - CPUID function 0DH, sub-function 1:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVES instruction for an XSAVE area
-		 *    containing all the state components
-		 *    corresponding to bits currently set in
-		 *    XCR0 | IA32_XSS.
-		 */
-		cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	}
-	return calculated_xstate_size;
+static unsigned int __init get_xsave_size(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	/*
+	 * - CPUID function 0DH, sub-function 0:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVE instruction for an XSAVE area
+	 *    containing all the *user* state components
+	 *    corresponding to bits currently set in XCR0.
+	 */
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	return ebx;
 }
 
 /*
@@ -591,7 +598,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
 static int init_xstate_size(void)
 {
 	/* Recompute the context size for enabled features: */
-	unsigned int possible_xstate_size = calculate_xstate_size();
+	unsigned int possible_xstate_size;
+	unsigned int xsave_size;
+
+	xsave_size = get_xsave_size();
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		possible_xstate_size = get_xsaves_size();
+	else
+		possible_xstate_size = xsave_size;
 
 	/* Ensure we have the space to store all enabled: */
 	if (!is_supported_xstate_size(possible_xstate_size))
@@ -603,6 +618,11 @@ static int init_xstate_size(void)
 	 */
 	xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
+
+	/*
+	 * User space is always in standard format.
+	 */
+	fpu_user_xstate_size = xsave_size;
 	return 0;
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/4] x86/fpu/xstate: Rename xstate_size to fpu_kernel_xstate_size to distinguish from fpu_user_xstate_size
  2016-05-20 17:47 [PATCH 0/4] x86/fpu/state: Fix XSAVES issues - Part 1 Yu-cheng Yu
  2016-05-20 17:47 ` [PATCH 1/4] x86/fpu/xstate: Define and use fpu_user_xstate_size Yu-cheng Yu
@ 2016-05-20 17:47 ` Yu-cheng Yu
  2016-06-17 12:20   ` [tip:x86/fpu] x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size' tip-bot for Fenghua Yu
  2016-06-18  8:13   ` tip-bot for Fenghua Yu
  2016-05-20 17:47 ` [PATCH 3/4] x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization Yu-cheng Yu
  2016-05-20 17:47 ` [PATCH 4/4] x86/fpu/xstate: Copy xstate registers directly to signal frame when compacted format is in use Yu-cheng Yu
  3 siblings, 2 replies; 13+ messages in thread
From: Yu-cheng Yu @ 2016-05-20 17:47 UTC (permalink / raw)
  To: linux-kernel, x86, H. Peter Anvin, Thomas Gleixner, Ingo Molnar
  Cc: Dave Hansen, Andy Lutomirski, Sai Praneeth Prakhya,
	Ravi V. Shankar, Fenghua Yu, Yu-cheng Yu

From: Fenghua Yu <fenghua.yu@intel.com>

User space uses standard format xsave area. fpstate in signal frame
should have standard format size.

To explicitly distinguish between xstate size in kernel space and the
one in user space, we rename xstate_size to fpu_kernel_xstate_size.
This patch is not fixing a bug. It just makes kernel code more clear.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
[yu-cheng.yu@intel.com: rebase to current, rename to fpu_kernel_xstate_size]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/processor.h |  2 +-
 arch/x86/kernel/fpu/core.c       |  7 ++++---
 arch/x86/kernel/fpu/init.c       | 20 +++++++++++---------
 arch/x86/kernel/fpu/signal.c     |  2 +-
 arch/x86/kernel/fpu/xstate.c     |  8 ++++----
 5 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0a16a16..965c5d2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,7 +367,7 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
-extern unsigned int xstate_size;
+extern unsigned int fpu_kernel_xstate_size;
 extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 9702754..e41e1ec 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -222,7 +222,7 @@ void fpstate_init(union fpregs_state *state)
 		return;
 	}
 
-	memset(state, 0, xstate_size);
+	memset(state, 0, fpu_kernel_xstate_size);
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
@@ -247,7 +247,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * leak into the child task:
 	 */
 	if (use_eager_fpu())
-		memset(&dst_fpu->state.xsave, 0, xstate_size);
+		memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 
 	/*
 	 * Save current FPU registers directly into the child
@@ -266,7 +266,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 */
 	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+		memcpy(&src_fpu->state, &dst_fpu->state,
+		       fpu_kernel_xstate_size);
 
 		if (use_eager_fpu())
 			copy_kernel_to_fpregs(&src_fpu->state);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 5b1928c..60f3839 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void)
  * This is inherent to the XSAVE architecture which puts all state
  * components into a single, continuous memory block:
  */
-unsigned int xstate_size;
-EXPORT_SYMBOL_GPL(xstate_size);
+unsigned int fpu_kernel_xstate_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * Add back the dynamically-calculated register state
 	 * size.
 	 */
-	task_size += xstate_size;
+	task_size += fpu_kernel_xstate_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the user and kernel xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate sizes based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	on_boot_cpu = 0;
 
 	/*
-	 * Note that xstate_size might be overwriten later during
+	 * Note that xstate sizes might be overwritten later during
 	 * fpu__init_system_xstate().
 	 */
 
@@ -219,15 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-		xstate_size = sizeof(struct swregs_state);
+		fpu_kernel_xstate_size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			xstate_size = sizeof(struct fxregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fxregs_state);
 		else
-			xstate_size = sizeof(struct fregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fregs_state);
 	}
 
-	fpu_user_xstate_size = xstate_size;
+	fpu_user_xstate_size = fpu_kernel_xstate_size;
 
 	/*
 	 * Quirk: we don't yet handle the XSAVES* instructions
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 23572a9..1454989 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -262,7 +262,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
-	int state_size = xstate_size;
+	int state_size = fpu_kernel_xstate_size;
 	u64 xfeatures = 0;
 	int fx_only = 0;
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9c4da35..46abfaf 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -537,7 +537,7 @@ static void do_extra_xstate_size_checks(void)
 		 */
 		paranoid_xstate_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
+	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
 }
 
 
@@ -616,7 +616,7 @@ static int init_xstate_size(void)
 	 * The size is OK, we are definitely going to use xsave,
 	 * make it known to the world that we need more space.
 	 */
-	xstate_size = possible_xstate_size;
+	fpu_kernel_xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
 
 	/*
@@ -679,14 +679,14 @@ void __init fpu__init_system_xstate(void)
 		return;
 	}
 
-	update_regset_xstate_info(xstate_size, xfeatures_mask);
+	update_regset_xstate_info(fpu_kernel_xstate_size, xfeatures_mask);
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
-		xstate_size,
+		fpu_kernel_xstate_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 }
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/4] x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization
  2016-05-20 17:47 [PATCH 0/4] x86/fpu/state: Fix XSAVES issues - Part 1 Yu-cheng Yu
  2016-05-20 17:47 ` [PATCH 1/4] x86/fpu/xstate: Define and use fpu_user_xstate_size Yu-cheng Yu
  2016-05-20 17:47 ` [PATCH 2/4] x86/fpu/xstate: Rename xstate_size to fpu_kernel_xstate_size to distinguish from fpu_user_xstate_size Yu-cheng Yu
@ 2016-05-20 17:47 ` Yu-cheng Yu
  2016-06-17 12:21   ` [tip:x86/fpu] " tip-bot for Fenghua Yu
  2016-06-18  8:13   ` tip-bot for Fenghua Yu
  2016-05-20 17:47 ` [PATCH 4/4] x86/fpu/xstate: Copy xstate registers directly to signal frame when compacted format is in use Yu-cheng Yu
  3 siblings, 2 replies; 13+ messages in thread
From: Yu-cheng Yu @ 2016-05-20 17:47 UTC (permalink / raw)
  To: linux-kernel, x86, H. Peter Anvin, Thomas Gleixner, Ingo Molnar
  Cc: Dave Hansen, Andy Lutomirski, Sai Praneeth Prakhya,
	Ravi V. Shankar, Fenghua Yu, Yu-cheng Yu

From: Fenghua Yu <fenghua.yu@intel.com>

Keep init_fpstate.xsave.header.xfeatures as zero for init optimization.
This is important for init optimization that is implemented in processor.
If a bit corresponding to an xstate in xstate_bv is 0, it means the
xstate is in init status and will not be read from memory to the processor
during XRSTOR/XRSTORS instruction. This largely impacts context switch
performance.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/kernel/fpu/xstate.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 46abfaf..dbfef1b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -329,13 +329,11 @@ static void __init setup_init_fpu_buf(void)
 	setup_xstate_features();
 	print_xstate_features();
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
-		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
-	}
 
 	/*
-	 * Init all the features state with header_bv being 0x0
+	 * Init all the features state with header.xfeatures being 0x0
 	 */
 	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/4] x86/fpu/xstate: Copy xstate registers directly to signal frame when compacted format is in use
  2016-05-20 17:47 [PATCH 0/4] x86/fpu/state: Fix XSAVES issues - Part 1 Yu-cheng Yu
                   ` (2 preceding siblings ...)
  2016-05-20 17:47 ` [PATCH 3/4] x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization Yu-cheng Yu
@ 2016-05-20 17:47 ` Yu-cheng Yu
  2016-06-17 12:21   ` [tip:x86/fpu] x86/fpu/xstate: Copy xstate registers directly to the " tip-bot for Yu-cheng Yu
  2016-06-18  8:14   ` tip-bot for Yu-cheng Yu
  3 siblings, 2 replies; 13+ messages in thread
From: Yu-cheng Yu @ 2016-05-20 17:47 UTC (permalink / raw)
  To: linux-kernel, x86, H. Peter Anvin, Thomas Gleixner, Ingo Molnar
  Cc: Dave Hansen, Andy Lutomirski, Sai Praneeth Prakhya,
	Ravi V. Shankar, Fenghua Yu, Yu-cheng Yu

XSAVES is a kernel instruction and uses a compacted format. When working
with user space, the kernel should provide standard-format, non-supervisor
state data. We cannot do __copy_to_user() from a compacted-format kernel
xstate area to a signal frame.

Dave Hansen proposes this method to simplify copy xstate directly to user.
Based on an earlier patch from Fenghua Yu <fenghua.yu@intel.com>

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/fpu/xstate.h | 1 +
 arch/x86/kernel/fpu/signal.c      | 3 ++-
 arch/x86/kernel/fpu/xstate.c      | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 16df2c4..d812cf3 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -47,5 +47,6 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
+int using_compacted_format(void);
 
 #endif
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 1454989..9ce2963 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -8,6 +8,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/sigframe.h>
 
@@ -168,7 +169,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active()) {
+	if (fpregs_active() || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index dbfef1b..0b01f00 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -420,7 +420,7 @@ static int xfeature_size(int xfeature_nr)
  * that it is obvious which aspect of 'XSAVES' is being handled
  * by the calling code.
  */
-static int using_compacted_format(void)
+int using_compacted_format(void)
 {
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/fpu] x86/fpu/xstate: Define and use 'fpu_user_xstate_size'
  2016-05-20 17:47 ` [PATCH 1/4] x86/fpu/xstate: Define and use fpu_user_xstate_size Yu-cheng Yu
@ 2016-06-17 12:20   ` tip-bot for Fenghua Yu
  2016-06-18  8:12   ` tip-bot for Fenghua Yu
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Fenghua Yu @ 2016-06-17 12:20 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, dave.hansen, mingo, hpa, brgerst, sai.praneeth.prakhya,
	dvlasenk, oleg, quentin.casasnovas, linux-kernel, fenghua.yu,
	torvalds, yu-cheng.yu, luto, dave.hansen, ravi.v.shankar, tglx,
	peterz

Commit-ID:  4543ea7e282d313b48cd34bbb9dc89c1dbdd13a7
Gitweb:     http://git.kernel.org/tip/4543ea7e282d313b48cd34bbb9dc89c1dbdd13a7
Author:     Fenghua Yu <fenghua.yu@intel.com>
AuthorDate: Fri, 20 May 2016 10:47:05 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 17 Jun 2016 10:10:22 +0200

x86/fpu/xstate: Define and use 'fpu_user_xstate_size'

The kernel xstate area can be in standard or compacted format;
it is always in standard format for user mode. When XSAVES is
enabled, the kernel uses the compacted format and it is necessary
to use a separate fpu_user_xstate_size for signal/ptrace frames.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
[ Rebased the patch and cleaned up the naming. ]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8756ec34dabddfc727cda5743195eb81e8caf91c.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  1 -
 arch/x86/include/asm/processor.h  |  1 +
 arch/x86/kernel/fpu/init.c        |  5 ++-
 arch/x86/kernel/fpu/signal.c      | 27 ++++++++++----
 arch/x86/kernel/fpu/xstate.c      | 76 ++++++++++++++++++++++++---------------
 5 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 38951b0..16df2c4 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -39,7 +39,6 @@
 #define REX_PREFIX
 #endif
 
-extern unsigned int xstate_size;
 extern u64 xfeatures_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3..0a16a16 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -368,6 +368,7 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
 extern unsigned int xstate_size;
+extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index aacfd7a..5b1928c 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate_size based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -226,6 +226,9 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		else
 			xstate_size = sizeof(struct fregs_state);
 	}
+
+	fpu_user_xstate_size = xstate_size;
+
 	/*
 	 * Quirk: we don't yet handle the XSAVES* instructions
 	 * correctly, as we don't correctly convert between
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index c6f2a3c..0d29d4d 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -32,7 +32,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf,
 	/* Check for the first magic field and other error scenarios. */
 	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
 	    fx_sw->xstate_size < min_xstate_size ||
-	    fx_sw->xstate_size > xstate_size ||
+	    fx_sw->xstate_size > fpu_user_xstate_size ||
 	    fx_sw->xstate_size > fx_sw->extended_size)
 		return -1;
 
@@ -89,7 +89,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 	if (!use_xsave())
 		return err;
 
-	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+	err |= __put_user(FP_XSTATE_MAGIC2,
+			  (__u32 *)(buf + fpu_user_xstate_size));
 
 	/*
 	 * Read the xfeatures which we copied (directly from the cpu or
@@ -126,7 +127,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
 	else
 		err = copy_fregs_to_user((struct fregs_state __user *) buf);
 
-	if (unlikely(err) && __clear_user(buf, xstate_size))
+	if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
 		err = -EFAULT;
 	return err;
 }
@@ -176,8 +177,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 		if (ia32_fxstate)
 			copy_fxregs_to_kernel(&tsk->thread.fpu);
 	} else {
+		/*
+		 * It is a *bug* if kernel uses compacted-format for xsave
+		 * area and we copy it out directly to a signal frame. It
+		 * should have been handled above by saving the registers
+		 * directly.
+		 */
+		if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+			WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
+			return -1;
+		}
+
 		fpstate_sanitize_xstate(&tsk->thread.fpu);
-		if (__copy_to_user(buf_fx, xsave, xstate_size))
+		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
 
@@ -344,7 +356,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 static inline int xstate_sigframe_size(void)
 {
-	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+	return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+			fpu_user_xstate_size;
 }
 
 /*
@@ -388,12 +401,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+	int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xfeatures = xfeatures_mask;
-	fx_sw_reserved.xstate_size = xstate_size;
+	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
 
 	if (config_enabled(CONFIG_IA32_EMULATION) ||
 	    config_enabled(CONFIG_X86_32)) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4ea2a59..9c4da35 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -44,6 +44,13 @@ static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] =
 static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
 
 /*
+ * The XSAVE area of kernel can be in standard or compacted format;
+ * it is always in standard format for user mode. This is the user
+ * mode standard format size used for signal and ptrace frames.
+ */
+unsigned int fpu_user_xstate_size;
+
+/*
  * Clear all of the X86_FEATURE_* bits that are unavailable
  * when the CPU has no XSAVE support.
  */
@@ -171,7 +178,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	 */
 	while (xfeatures) {
 		if (xfeatures & 0x1) {
-			int offset = xstate_offsets[feature_bit];
+			int offset = xstate_comp_offsets[feature_bit];
 			int size = xstate_sizes[feature_bit];
 
 			memcpy((void *)fx + offset,
@@ -533,8 +540,9 @@ static void do_extra_xstate_size_checks(void)
 	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
 }
 
+
 /*
- * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0/xfeatures_mask.
  *
  * Note the SDM's wording here.  "sub-function 0" only enumerates
  * the size of the *user* states.  If we use it to size a buffer
@@ -544,34 +552,33 @@ static void do_extra_xstate_size_checks(void)
  * Note that we do not currently set any bits on IA32_XSS so
  * 'XCR0 | IA32_XSS == XCR0' for now.
  */
-static unsigned int __init calculate_xstate_size(void)
+static unsigned int __init get_xsaves_size(void)
 {
 	unsigned int eax, ebx, ecx, edx;
-	unsigned int calculated_xstate_size;
+	/*
+	 * - CPUID function 0DH, sub-function 1:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVES instruction for an XSAVE area
+	 *    containing all the state components
+	 *    corresponding to bits currently set in
+	 *    XCR0 | IA32_XSS.
+	 */
+	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
 
-	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
-		/*
-		 * - CPUID function 0DH, sub-function 0:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVE instruction for an XSAVE area
-		 *    containing all the *user* state components
-		 *    corresponding to bits currently set in XCR0.
-		 */
-		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	} else {
-		/*
-		 * - CPUID function 0DH, sub-function 1:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVES instruction for an XSAVE area
-		 *    containing all the state components
-		 *    corresponding to bits currently set in
-		 *    XCR0 | IA32_XSS.
-		 */
-		cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	}
-	return calculated_xstate_size;
+static unsigned int __init get_xsave_size(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	/*
+	 * - CPUID function 0DH, sub-function 0:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVE instruction for an XSAVE area
+	 *    containing all the *user* state components
+	 *    corresponding to bits currently set in XCR0.
+	 */
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	return ebx;
 }
 
 /*
@@ -591,7 +598,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
 static int init_xstate_size(void)
 {
 	/* Recompute the context size for enabled features: */
-	unsigned int possible_xstate_size = calculate_xstate_size();
+	unsigned int possible_xstate_size;
+	unsigned int xsave_size;
+
+	xsave_size = get_xsave_size();
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		possible_xstate_size = get_xsaves_size();
+	else
+		possible_xstate_size = xsave_size;
 
 	/* Ensure we have the space to store all enabled: */
 	if (!is_supported_xstate_size(possible_xstate_size))
@@ -603,6 +618,11 @@ static int init_xstate_size(void)
 	 */
 	xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
+
+	/*
+	 * User space is always in standard format.
+	 */
+	fpu_user_xstate_size = xsave_size;
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/fpu] x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size'
  2016-05-20 17:47 ` [PATCH 2/4] x86/fpu/xstate: Rename xstate_size to fpu_kernel_xstate_size to distinguish from fpu_user_xstate_size Yu-cheng Yu
@ 2016-06-17 12:20   ` tip-bot for Fenghua Yu
  2016-06-18  8:13   ` tip-bot for Fenghua Yu
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Fenghua Yu @ 2016-06-17 12:20 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dave.hansen, brgerst, peterz, yu-cheng.yu, quentin.casasnovas,
	torvalds, dvlasenk, dave.hansen, linux-kernel, tglx, mingo, bp,
	sai.praneeth.prakhya, ravi.v.shankar, fenghua.yu, luto, oleg,
	hpa

Commit-ID:  63a5db07a03947218e5f4fb0776df6b6ca328287
Gitweb:     http://git.kernel.org/tip/63a5db07a03947218e5f4fb0776df6b6ca328287
Author:     Fenghua Yu <fenghua.yu@intel.com>
AuthorDate: Fri, 20 May 2016 10:47:06 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 17 Jun 2016 10:10:22 +0200

x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size'

User space uses standard format xsave area. fpstate in signal frame
should have standard format size.

To explicitly distinguish between xstate size in kernel space and the
one in user space, we rename 'xstate_size' to 'fpu_kernel_xstate_size'.

Cleanup only, no change in functionality.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
[ Rebased the patch and cleaned up the naming. ]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2ecbae347a5152d94be52adf7d0f3b7305d90d99.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h |  2 +-
 arch/x86/kernel/fpu/core.c       |  7 ++++---
 arch/x86/kernel/fpu/init.c       | 20 +++++++++++---------
 arch/x86/kernel/fpu/signal.c     |  2 +-
 arch/x86/kernel/fpu/xstate.c     |  8 ++++----
 5 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0a16a16..965c5d2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,7 +367,7 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
-extern unsigned int xstate_size;
+extern unsigned int fpu_kernel_xstate_size;
 extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 7d56474..c759bd0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -227,7 +227,7 @@ void fpstate_init(union fpregs_state *state)
 		return;
 	}
 
-	memset(state, 0, xstate_size);
+	memset(state, 0, fpu_kernel_xstate_size);
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
@@ -252,7 +252,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * leak into the child task:
 	 */
 	if (use_eager_fpu())
-		memset(&dst_fpu->state.xsave, 0, xstate_size);
+		memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 
 	/*
 	 * Save current FPU registers directly into the child
@@ -271,7 +271,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 */
 	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+		memcpy(&src_fpu->state, &dst_fpu->state,
+		       fpu_kernel_xstate_size);
 
 		if (use_eager_fpu())
 			copy_kernel_to_fpregs(&src_fpu->state);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 5b1928c..60f3839 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void)
  * This is inherent to the XSAVE architecture which puts all state
  * components into a single, continuous memory block:
  */
-unsigned int xstate_size;
-EXPORT_SYMBOL_GPL(xstate_size);
+unsigned int fpu_kernel_xstate_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * Add back the dynamically-calculated register state
 	 * size.
 	 */
-	task_size += xstate_size;
+	task_size += fpu_kernel_xstate_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the user and kernel xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate sizes based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	on_boot_cpu = 0;
 
 	/*
-	 * Note that xstate_size might be overwriten later during
+	 * Note that xstate sizes might be overwritten later during
 	 * fpu__init_system_xstate().
 	 */
 
@@ -219,15 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-		xstate_size = sizeof(struct swregs_state);
+		fpu_kernel_xstate_size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			xstate_size = sizeof(struct fxregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fxregs_state);
 		else
-			xstate_size = sizeof(struct fregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fregs_state);
 	}
 
-	fpu_user_xstate_size = xstate_size;
+	fpu_user_xstate_size = fpu_kernel_xstate_size;
 
 	/*
 	 * Quirk: we don't yet handle the XSAVES* instructions
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 0d29d4d..06d80f6 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -263,7 +263,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
-	int state_size = xstate_size;
+	int state_size = fpu_kernel_xstate_size;
 	u64 xfeatures = 0;
 	int fx_only = 0;
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9c4da35..46abfaf 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -537,7 +537,7 @@ static void do_extra_xstate_size_checks(void)
 		 */
 		paranoid_xstate_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
+	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
 }
 
 
@@ -616,7 +616,7 @@ static int init_xstate_size(void)
 	 * The size is OK, we are definitely going to use xsave,
 	 * make it known to the world that we need more space.
 	 */
-	xstate_size = possible_xstate_size;
+	fpu_kernel_xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
 
 	/*
@@ -679,14 +679,14 @@ void __init fpu__init_system_xstate(void)
 		return;
 	}
 
-	update_regset_xstate_info(xstate_size, xfeatures_mask);
+	update_regset_xstate_info(fpu_kernel_xstate_size, xfeatures_mask);
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
-		xstate_size,
+		fpu_kernel_xstate_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 }
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/fpu] x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization
  2016-05-20 17:47 ` [PATCH 3/4] x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization Yu-cheng Yu
@ 2016-06-17 12:21   ` tip-bot for Fenghua Yu
  2016-06-18  8:13   ` tip-bot for Fenghua Yu
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Fenghua Yu @ 2016-06-17 12:21 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, yu-cheng.yu, oleg, sai.praneeth.prakhya, tglx,
	bp, fenghua.yu, brgerst, dvlasenk, dave.hansen, dave.hansen,
	torvalds, ravi.v.shankar, mingo, luto, quentin.casasnovas,
	peterz

Commit-ID:  2729818f35c9b1a1614624e2edcd3e80c59c8689
Gitweb:     http://git.kernel.org/tip/2729818f35c9b1a1614624e2edcd3e80c59c8689
Author:     Fenghua Yu <fenghua.yu@intel.com>
AuthorDate: Fri, 20 May 2016 10:47:07 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 17 Jun 2016 10:10:23 +0200

x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization

Keep init_fpstate.xsave.header.xfeatures as zero for init optimization.
This is important for init optimization that is implemented in processor.
If a bit corresponding to an xstate in xstate_bv is 0, it means the
xstate is in init status and will not be read from memory to the processor
during XRSTOR/XRSTORS instruction. This largely impacts context switch
performance.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2fb4ec7f18b76e8cda057a8c0038def74a9b8044.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 46abfaf..dbfef1b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -329,13 +329,11 @@ static void __init setup_init_fpu_buf(void)
 	setup_xstate_features();
 	print_xstate_features();
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
-		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
-	}
 
 	/*
-	 * Init all the features state with header_bv being 0x0
+	 * Init all the features state with header.xfeatures being 0x0
 	 */
 	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/fpu] x86/fpu/xstate: Copy xstate registers directly to the signal frame when compacted format is in use
  2016-05-20 17:47 ` [PATCH 4/4] x86/fpu/xstate: Copy xstate registers directly to signal frame when compacted format is in use Yu-cheng Yu
@ 2016-06-17 12:21   ` tip-bot for Yu-cheng Yu
  2016-06-18  8:14   ` tip-bot for Yu-cheng Yu
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Yu-cheng Yu @ 2016-06-17 12:21 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: torvalds, peterz, brgerst, dave.hansen, hpa, quentin.casasnovas,
	oleg, tglx, yu-cheng.yu, luto, linux-kernel, dave.hansen, mingo,
	sai.praneeth.prakhya, bp, fenghua.yu, ravi.v.shankar, dvlasenk

Commit-ID:  f605eb549cb2624ab36ea8d7847bc016f5dc31ce
Gitweb:     http://git.kernel.org/tip/f605eb549cb2624ab36ea8d7847bc016f5dc31ce
Author:     Yu-cheng Yu <yu-cheng.yu@intel.com>
AuthorDate: Fri, 20 May 2016 10:47:08 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Fri, 17 Jun 2016 10:10:24 +0200

x86/fpu/xstate: Copy xstate registers directly to the signal frame when compacted format is in use

XSAVES is a kernel instruction and uses a compacted format. When working
with user space, the kernel should provide standard-format, non-supervisor
state data. We cannot do __copy_to_user() from a compacted-format kernel
xstate area to a signal frame.

Dave Hansen proposes this method to simplify copy xstate directly to user.

This patch is based on an earlier patch from Fenghua Yu <fenghua.yu@intel.com>

Originally-from: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/c36f419d525517d04209a28dd8e1e5af9000036e.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h | 1 +
 arch/x86/kernel/fpu/signal.c      | 3 ++-
 arch/x86/kernel/fpu/xstate.c      | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 16df2c4..d812cf3 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -47,5 +47,6 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
+int using_compacted_format(void);
 
 #endif
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 06d80f6..8aa96cb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -8,6 +8,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/sigframe.h>
 #include <asm/trace/fpu.h>
@@ -169,7 +170,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active()) {
+	if (fpregs_active() || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index dbfef1b..0b01f00 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -420,7 +420,7 @@ static int xfeature_size(int xfeature_nr)
  * that it is obvious which aspect of 'XSAVES' is being handled
  * by the calling code.
  */
-static int using_compacted_format(void)
+int using_compacted_format(void)
 {
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/fpu] x86/fpu/xstate: Define and use 'fpu_user_xstate_size'
  2016-05-20 17:47 ` [PATCH 1/4] x86/fpu/xstate: Define and use fpu_user_xstate_size Yu-cheng Yu
  2016-06-17 12:20   ` [tip:x86/fpu] x86/fpu/xstate: Define and use 'fpu_user_xstate_size' tip-bot for Fenghua Yu
@ 2016-06-18  8:12   ` tip-bot for Fenghua Yu
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Fenghua Yu @ 2016-06-18  8:12 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: fenghua.yu, dave.hansen, luto, mingo, peterz, torvalds,
	quentin.casasnovas, oleg, hpa, sai.praneeth.prakhya,
	ravi.v.shankar, yu-cheng.yu, tglx, dvlasenk, linux-kernel,
	dave.hansen, bp, brgerst

Commit-ID:  a1141e0b5ca6ee3e5e35d5f1a310a5ecb9c96ce5
Gitweb:     http://git.kernel.org/tip/a1141e0b5ca6ee3e5e35d5f1a310a5ecb9c96ce5
Author:     Fenghua Yu <fenghua.yu@intel.com>
AuthorDate: Fri, 20 May 2016 10:47:05 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 18 Jun 2016 10:10:18 +0200

x86/fpu/xstate: Define and use 'fpu_user_xstate_size'

The kernel xstate area can be in standard or compacted format;
it is always in standard format for user mode. When XSAVES is
enabled, the kernel uses the compacted format and it is necessary
to use a separate fpu_user_xstate_size for signal/ptrace frames.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
[ Rebased the patch and cleaned up the naming. ]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8756ec34dabddfc727cda5743195eb81e8caf91c.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  1 -
 arch/x86/include/asm/processor.h  |  1 +
 arch/x86/kernel/fpu/init.c        |  5 ++-
 arch/x86/kernel/fpu/signal.c      | 27 ++++++++++----
 arch/x86/kernel/fpu/xstate.c      | 76 ++++++++++++++++++++++++---------------
 5 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 38951b0..16df2c4 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -39,7 +39,6 @@
 #define REX_PREFIX
 #endif
 
-extern unsigned int xstate_size;
 extern u64 xfeatures_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3..0a16a16 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -368,6 +368,7 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
 extern unsigned int xstate_size;
+extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index aacfd7a..5b1928c 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate_size based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -226,6 +226,9 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		else
 			xstate_size = sizeof(struct fregs_state);
 	}
+
+	fpu_user_xstate_size = xstate_size;
+
 	/*
 	 * Quirk: we don't yet handle the XSAVES* instructions
 	 * correctly, as we don't correctly convert between
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index c6f2a3c..0d29d4d 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -32,7 +32,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf,
 	/* Check for the first magic field and other error scenarios. */
 	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
 	    fx_sw->xstate_size < min_xstate_size ||
-	    fx_sw->xstate_size > xstate_size ||
+	    fx_sw->xstate_size > fpu_user_xstate_size ||
 	    fx_sw->xstate_size > fx_sw->extended_size)
 		return -1;
 
@@ -89,7 +89,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 	if (!use_xsave())
 		return err;
 
-	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+	err |= __put_user(FP_XSTATE_MAGIC2,
+			  (__u32 *)(buf + fpu_user_xstate_size));
 
 	/*
 	 * Read the xfeatures which we copied (directly from the cpu or
@@ -126,7 +127,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
 	else
 		err = copy_fregs_to_user((struct fregs_state __user *) buf);
 
-	if (unlikely(err) && __clear_user(buf, xstate_size))
+	if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
 		err = -EFAULT;
 	return err;
 }
@@ -176,8 +177,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 		if (ia32_fxstate)
 			copy_fxregs_to_kernel(&tsk->thread.fpu);
 	} else {
+		/*
+		 * It is a *bug* if kernel uses compacted-format for xsave
+		 * area and we copy it out directly to a signal frame. It
+		 * should have been handled above by saving the registers
+		 * directly.
+		 */
+		if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+			WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
+			return -1;
+		}
+
 		fpstate_sanitize_xstate(&tsk->thread.fpu);
-		if (__copy_to_user(buf_fx, xsave, xstate_size))
+		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
 
@@ -344,7 +356,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 static inline int xstate_sigframe_size(void)
 {
-	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+	return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+			fpu_user_xstate_size;
 }
 
 /*
@@ -388,12 +401,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+	int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xfeatures = xfeatures_mask;
-	fx_sw_reserved.xstate_size = xstate_size;
+	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
 
 	if (config_enabled(CONFIG_IA32_EMULATION) ||
 	    config_enabled(CONFIG_X86_32)) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4ea2a59..9c4da35 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -44,6 +44,13 @@ static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] =
 static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
 
 /*
+ * The XSAVE area of kernel can be in standard or compacted format;
+ * it is always in standard format for user mode. This is the user
+ * mode standard format size used for signal and ptrace frames.
+ */
+unsigned int fpu_user_xstate_size;
+
+/*
  * Clear all of the X86_FEATURE_* bits that are unavailable
  * when the CPU has no XSAVE support.
  */
@@ -171,7 +178,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	 */
 	while (xfeatures) {
 		if (xfeatures & 0x1) {
-			int offset = xstate_offsets[feature_bit];
+			int offset = xstate_comp_offsets[feature_bit];
 			int size = xstate_sizes[feature_bit];
 
 			memcpy((void *)fx + offset,
@@ -533,8 +540,9 @@ static void do_extra_xstate_size_checks(void)
 	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
 }
 
+
 /*
- * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0/xfeatures_mask.
  *
  * Note the SDM's wording here.  "sub-function 0" only enumerates
  * the size of the *user* states.  If we use it to size a buffer
@@ -544,34 +552,33 @@ static void do_extra_xstate_size_checks(void)
  * Note that we do not currently set any bits on IA32_XSS so
  * 'XCR0 | IA32_XSS == XCR0' for now.
  */
-static unsigned int __init calculate_xstate_size(void)
+static unsigned int __init get_xsaves_size(void)
 {
 	unsigned int eax, ebx, ecx, edx;
-	unsigned int calculated_xstate_size;
+	/*
+	 * - CPUID function 0DH, sub-function 1:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVES instruction for an XSAVE area
+	 *    containing all the state components
+	 *    corresponding to bits currently set in
+	 *    XCR0 | IA32_XSS.
+	 */
+	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
 
-	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
-		/*
-		 * - CPUID function 0DH, sub-function 0:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVE instruction for an XSAVE area
-		 *    containing all the *user* state components
-		 *    corresponding to bits currently set in XCR0.
-		 */
-		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	} else {
-		/*
-		 * - CPUID function 0DH, sub-function 1:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVES instruction for an XSAVE area
-		 *    containing all the state components
-		 *    corresponding to bits currently set in
-		 *    XCR0 | IA32_XSS.
-		 */
-		cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	}
-	return calculated_xstate_size;
+static unsigned int __init get_xsave_size(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	/*
+	 * - CPUID function 0DH, sub-function 0:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVE instruction for an XSAVE area
+	 *    containing all the *user* state components
+	 *    corresponding to bits currently set in XCR0.
+	 */
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	return ebx;
 }
 
 /*
@@ -591,7 +598,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
 static int init_xstate_size(void)
 {
 	/* Recompute the context size for enabled features: */
-	unsigned int possible_xstate_size = calculate_xstate_size();
+	unsigned int possible_xstate_size;
+	unsigned int xsave_size;
+
+	xsave_size = get_xsave_size();
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		possible_xstate_size = get_xsaves_size();
+	else
+		possible_xstate_size = xsave_size;
 
 	/* Ensure we have the space to store all enabled: */
 	if (!is_supported_xstate_size(possible_xstate_size))
@@ -603,6 +618,11 @@ static int init_xstate_size(void)
 	 */
 	xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
+
+	/*
+	 * User space is always in standard format.
+	 */
+	fpu_user_xstate_size = xsave_size;
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/fpu] x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size'
  2016-05-20 17:47 ` [PATCH 2/4] x86/fpu/xstate: Rename xstate_size to fpu_kernel_xstate_size to distinguish from fpu_user_xstate_size Yu-cheng Yu
  2016-06-17 12:20   ` [tip:x86/fpu] x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size' tip-bot for Fenghua Yu
@ 2016-06-18  8:13   ` tip-bot for Fenghua Yu
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Fenghua Yu @ 2016-06-18  8:13 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, bp, mingo, oleg, yu-cheng.yu, fenghua.yu, torvalds,
	dvlasenk, dave.hansen, tglx, sai.praneeth.prakhya,
	quentin.casasnovas, dave.hansen, brgerst, hpa, ravi.v.shankar,
	luto, linux-kernel

Commit-ID:  bf15a8cf8d14879b785c548728415d36ccb6a33b
Gitweb:     http://git.kernel.org/tip/bf15a8cf8d14879b785c548728415d36ccb6a33b
Author:     Fenghua Yu <fenghua.yu@intel.com>
AuthorDate: Fri, 20 May 2016 10:47:06 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 18 Jun 2016 10:10:18 +0200

x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size'

User space uses standard format xsave area. fpstate in signal frame
should have standard format size.

To explicitly distinguish between xstate size in kernel space and the
one in user space, we rename 'xstate_size' to 'fpu_kernel_xstate_size'.

Cleanup only, no change in functionality.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
[ Rebased the patch and cleaned up the naming. ]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2ecbae347a5152d94be52adf7d0f3b7305d90d99.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h |  2 +-
 arch/x86/kernel/fpu/core.c       |  7 ++++---
 arch/x86/kernel/fpu/init.c       | 20 +++++++++++---------
 arch/x86/kernel/fpu/signal.c     |  2 +-
 arch/x86/kernel/fpu/xstate.c     |  8 ++++----
 5 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0a16a16..965c5d2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,7 +367,7 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
-extern unsigned int xstate_size;
+extern unsigned int fpu_kernel_xstate_size;
 extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 7d56474..c759bd0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -227,7 +227,7 @@ void fpstate_init(union fpregs_state *state)
 		return;
 	}
 
-	memset(state, 0, xstate_size);
+	memset(state, 0, fpu_kernel_xstate_size);
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
@@ -252,7 +252,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * leak into the child task:
 	 */
 	if (use_eager_fpu())
-		memset(&dst_fpu->state.xsave, 0, xstate_size);
+		memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 
 	/*
 	 * Save current FPU registers directly into the child
@@ -271,7 +271,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 */
 	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+		memcpy(&src_fpu->state, &dst_fpu->state,
+		       fpu_kernel_xstate_size);
 
 		if (use_eager_fpu())
 			copy_kernel_to_fpregs(&src_fpu->state);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 5b1928c..60f3839 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void)
  * This is inherent to the XSAVE architecture which puts all state
  * components into a single, continuous memory block:
  */
-unsigned int xstate_size;
-EXPORT_SYMBOL_GPL(xstate_size);
+unsigned int fpu_kernel_xstate_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * Add back the dynamically-calculated register state
 	 * size.
 	 */
-	task_size += xstate_size;
+	task_size += fpu_kernel_xstate_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the user and kernel xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate sizes based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	on_boot_cpu = 0;
 
 	/*
-	 * Note that xstate_size might be overwriten later during
+	 * Note that xstate sizes might be overwritten later during
 	 * fpu__init_system_xstate().
 	 */
 
@@ -219,15 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-		xstate_size = sizeof(struct swregs_state);
+		fpu_kernel_xstate_size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			xstate_size = sizeof(struct fxregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fxregs_state);
 		else
-			xstate_size = sizeof(struct fregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fregs_state);
 	}
 
-	fpu_user_xstate_size = xstate_size;
+	fpu_user_xstate_size = fpu_kernel_xstate_size;
 
 	/*
 	 * Quirk: we don't yet handle the XSAVES* instructions
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 0d29d4d..06d80f6 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -263,7 +263,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
-	int state_size = xstate_size;
+	int state_size = fpu_kernel_xstate_size;
 	u64 xfeatures = 0;
 	int fx_only = 0;
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9c4da35..46abfaf 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -537,7 +537,7 @@ static void do_extra_xstate_size_checks(void)
 		 */
 		paranoid_xstate_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
+	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
 }
 
 
@@ -616,7 +616,7 @@ static int init_xstate_size(void)
 	 * The size is OK, we are definitely going to use xsave,
 	 * make it known to the world that we need more space.
 	 */
-	xstate_size = possible_xstate_size;
+	fpu_kernel_xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
 
 	/*
@@ -679,14 +679,14 @@ void __init fpu__init_system_xstate(void)
 		return;
 	}
 
-	update_regset_xstate_info(xstate_size, xfeatures_mask);
+	update_regset_xstate_info(fpu_kernel_xstate_size, xfeatures_mask);
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
-		xstate_size,
+		fpu_kernel_xstate_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 }
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/fpu] x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization
  2016-05-20 17:47 ` [PATCH 3/4] x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization Yu-cheng Yu
  2016-06-17 12:21   ` [tip:x86/fpu] " tip-bot for Fenghua Yu
@ 2016-06-18  8:13   ` tip-bot for Fenghua Yu
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Fenghua Yu @ 2016-06-18  8:13 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: brgerst, sai.praneeth.prakhya, luto, tglx, oleg, torvalds,
	fenghua.yu, quentin.casasnovas, yu-cheng.yu, dave.hansen, hpa,
	peterz, mingo, bp, ravi.v.shankar, dvlasenk, linux-kernel,
	dave.hansen

Commit-ID:  7d9370607d28afd454775c623d5447603473a3c3
Gitweb:     http://git.kernel.org/tip/7d9370607d28afd454775c623d5447603473a3c3
Author:     Fenghua Yu <fenghua.yu@intel.com>
AuthorDate: Fri, 20 May 2016 10:47:07 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 18 Jun 2016 10:10:19 +0200

x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization

Keep init_fpstate.xsave.header.xfeatures as zero for init optimization.
This is important for init optimization that is implemented in processor.
If a bit corresponding to an xstate in xstate_bv is 0, it means the
xstate is in init status and will not be read from memory to the processor
during XRSTOR/XRSTORS instruction. This largely impacts context switch
performance.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2fb4ec7f18b76e8cda057a8c0038def74a9b8044.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 46abfaf..dbfef1b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -329,13 +329,11 @@ static void __init setup_init_fpu_buf(void)
 	setup_xstate_features();
 	print_xstate_features();
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
-		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
-	}
 
 	/*
-	 * Init all the features state with header_bv being 0x0
+	 * Init all the features state with header.xfeatures being 0x0
 	 */
 	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:x86/fpu] x86/fpu/xstate: Copy xstate registers directly to the signal frame when compacted format is in use
  2016-05-20 17:47 ` [PATCH 4/4] x86/fpu/xstate: Copy xstate registers directly to signal frame when compacted format is in use Yu-cheng Yu
  2016-06-17 12:21   ` [tip:x86/fpu] x86/fpu/xstate: Copy xstate registers directly to the " tip-bot for Yu-cheng Yu
@ 2016-06-18  8:14   ` tip-bot for Yu-cheng Yu
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Yu-cheng Yu @ 2016-06-18  8:14 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: quentin.casasnovas, sai.praneeth.prakhya, yu-cheng.yu,
	fenghua.yu, tglx, hpa, dvlasenk, torvalds, oleg, bp, dave.hansen,
	mingo, peterz, linux-kernel, dave.hansen, luto, brgerst,
	ravi.v.shankar

Commit-ID:  99aa22d0d8f70d9317727ab40c85b2ead740a6ca
Gitweb:     http://git.kernel.org/tip/99aa22d0d8f70d9317727ab40c85b2ead740a6ca
Author:     Yu-cheng Yu <yu-cheng.yu@intel.com>
AuthorDate: Fri, 20 May 2016 10:47:08 -0700
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 18 Jun 2016 10:10:19 +0200

x86/fpu/xstate: Copy xstate registers directly to the signal frame when compacted format is in use

XSAVES is a kernel instruction and uses a compacted format. When working
with user space, the kernel should provide standard-format, non-supervisor
state data. We cannot do __copy_to_user() from a compacted-format kernel
xstate area to a signal frame.

Dave Hansen proposes this method to simplify copy xstate directly to user.

This patch is based on an earlier patch from Fenghua Yu <fenghua.yu@intel.com>

Originally-from: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/c36f419d525517d04209a28dd8e1e5af9000036e.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h | 1 +
 arch/x86/kernel/fpu/signal.c      | 3 ++-
 arch/x86/kernel/fpu/xstate.c      | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 16df2c4..d812cf3 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -47,5 +47,6 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
+int using_compacted_format(void);
 
 #endif
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 06d80f6..8aa96cb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -8,6 +8,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/sigframe.h>
 #include <asm/trace/fpu.h>
@@ -169,7 +170,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active()) {
+	if (fpregs_active() || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index dbfef1b..0b01f00 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -420,7 +420,7 @@ static int xfeature_size(int xfeature_nr)
  * that it is obvious which aspect of 'XSAVES' is being handled
  * by the calling code.
  */
-static int using_compacted_format(void)
+int using_compacted_format(void)
 {
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-06-18  8:15 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-20 17:47 [PATCH 0/4] x86/fpu/state: Fix XSAVES issues - Part 1 Yu-cheng Yu
2016-05-20 17:47 ` [PATCH 1/4] x86/fpu/xstate: Define and use fpu_user_xstate_size Yu-cheng Yu
2016-06-17 12:20   ` [tip:x86/fpu] x86/fpu/xstate: Define and use 'fpu_user_xstate_size' tip-bot for Fenghua Yu
2016-06-18  8:12   ` tip-bot for Fenghua Yu
2016-05-20 17:47 ` [PATCH 2/4] x86/fpu/xstate: Rename xstate_size to fpu_kernel_xstate_size to distinguish from fpu_user_xstate_size Yu-cheng Yu
2016-06-17 12:20   ` [tip:x86/fpu] x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size' tip-bot for Fenghua Yu
2016-06-18  8:13   ` tip-bot for Fenghua Yu
2016-05-20 17:47 ` [PATCH 3/4] x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization Yu-cheng Yu
2016-06-17 12:21   ` [tip:x86/fpu] " tip-bot for Fenghua Yu
2016-06-18  8:13   ` tip-bot for Fenghua Yu
2016-05-20 17:47 ` [PATCH 4/4] x86/fpu/xstate: Copy xstate registers directly to signal frame when compacted format is in use Yu-cheng Yu
2016-06-17 12:21   ` [tip:x86/fpu] x86/fpu/xstate: Copy xstate registers directly to the " tip-bot for Yu-cheng Yu
2016-06-18  8:14   ` tip-bot for Yu-cheng Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).