linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hans Rosenfeld <hans.rosenfeld@amd.com>
To: <hpa@zytor.com>
Cc: <brgerst@gmail.com>, <tglx@linutronix.de>, <mingo@elte.hu>,
	<suresh.b.siddha@intel.com>, <eranian@google.com>,
	<robert.richter@amd.com>, <Andreas.Herrmann3@amd.com>,
	<x86@kernel.org>, <linux-kernel@vger.kernel.org>,
	Hans Rosenfeld <hans.rosenfeld@amd.com>
Subject: [RFC v3 8/8] x86, xsave: remove lazy allocation of xstate area
Date: Tue, 5 Apr 2011 17:50:56 +0200	[thread overview]
Message-ID: <1302018656-586370-9-git-send-email-hans.rosenfeld@amd.com> (raw)
In-Reply-To: <4D91FA76.1010908@zytor.com>

This patch completely removes lazy allocation of the xstate area. All
user tasks will always have an xstate area preallocated, just like they
already do when non-lazy features are present. The size of the xsave
area ranges from 112 to 960 bytes, depending on the xstates present and
enabled. Since it is common to use SSE etc. for optimization, the actual
overhead is expected to negligible.

This removes some of the special-case handling of non-lazy xstates. It
also greatly simplifies init_fpu() by removing the allocation code, the
check for presence of the xstate area or init_fpu() return value.

Signed-off-by: Hans Rosenfeld <hans.rosenfeld@amd.com>
---
 arch/x86/include/asm/i387.h   |   20 ++++++++------------
 arch/x86/kernel/i387.c        |   41 +++++++++--------------------------------
 arch/x86/kernel/traps.c       |   16 ++--------------
 arch/x86/kernel/xsave.c       |    8 ++------
 arch/x86/kvm/x86.c            |    4 ++--
 arch/x86/math-emu/fpu_entry.c |    8 ++------
 6 files changed, 25 insertions(+), 72 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index efe1476..989c0ac 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -40,7 +40,7 @@
 extern unsigned int sig_xstate_size;
 extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
-extern int init_fpu(struct task_struct *child);
+extern void init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
 extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
@@ -333,18 +333,14 @@ static union thread_xstate __init_xstate, *init_xstate = &__init_xstate;
 
 static inline void fpu_clear(struct fpu *fpu)
 {
-	if (pcntxt_mask & XCNTXT_NONLAZY) {
-		if (!fpu_allocated(fpu)) {
-			BUG_ON(init_xstate == NULL);
-			fpu->state = init_xstate;
-			init_xstate = NULL;
-		}
-		memset(fpu->state, 0, xstate_size);
-		fpu_finit(fpu);
-		set_used_math();
-	} else {
-		fpu_free(fpu);
+	if (!fpu_allocated(fpu)) {
+		BUG_ON(init_xstate == NULL);
+		fpu->state = init_xstate;
+		init_xstate = NULL;
 	}
+	memset(fpu->state, 0, xstate_size);
+	fpu_finit(fpu);
+	set_used_math();
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index dd9644a..df0b139 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -127,9 +127,9 @@ EXPORT_SYMBOL_GPL(fpu_finit);
  * value at reset if we support XMM instructions and then
  * remember the current task has used the FPU.
  */
-int init_fpu(struct task_struct *tsk)
+void init_fpu(struct task_struct *tsk)
 {
-	int ret;
+	BUG_ON(tsk->flags & PF_KTHREAD);
 
 	if (tsk_used_math(tsk)) {
 		if (HAVE_HWFP && tsk == current) {
@@ -137,20 +137,12 @@ int init_fpu(struct task_struct *tsk)
 			save_xstates(tsk);
 			preempt_enable();
 		}
-		return 0;
+		return;
 	}
 
-	/*
-	 * Memory allocation at the first usage of the FPU and other state.
-	 */
-	ret = fpu_alloc(&tsk->thread.fpu);
-	if (ret)
-		return ret;
-
 	fpu_finit(&tsk->thread.fpu);
 
 	set_stopped_child_used_math(tsk);
-	return 0;
 }
 EXPORT_SYMBOL_GPL(init_fpu);
 
@@ -173,14 +165,10 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 		unsigned int pos, unsigned int count,
 		void *kbuf, void __user *ubuf)
 {
-	int ret;
-
 	if (!cpu_has_fxsr)
 		return -ENODEV;
 
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
+	init_fpu(target);
 
 	if (use_xsaveopt())
 		sanitize_i387_state(target);
@@ -198,9 +186,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!cpu_has_fxsr)
 		return -ENODEV;
 
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
+	init_fpu(target);
 
 	if (use_xsaveopt())
 		sanitize_i387_state(target);
@@ -232,9 +218,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!cpu_has_xsave)
 		return -ENODEV;
 
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
+	init_fpu(target);
 
 	/*
 	 * Copy the 48bytes defined by the software first into the xstate
@@ -262,9 +246,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!cpu_has_xsave)
 		return -ENODEV;
 
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
+	init_fpu(target);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				 &target->thread.fpu.state->xsave, 0, -1);
@@ -427,11 +409,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	       void *kbuf, void __user *ubuf)
 {
 	struct user_i387_ia32_struct env;
-	int ret;
 
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
+	init_fpu(target);
 
 	if (!HAVE_HWFP)
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -462,9 +441,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	struct user_i387_ia32_struct env;
 	int ret;
 
-	ret = init_fpu(target);
-	if (ret)
-		return ret;
+	init_fpu(target);
 
 	if (use_xsaveopt())
 		sanitize_i387_state(target);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 872fc78..c8fbd04 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -734,20 +734,8 @@ asmlinkage void math_state_restore(void)
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
 
-	if (!tsk_used_math(tsk)) {
-		local_irq_enable();
-		/*
-		 * does a slab alloc which can sleep
-		 */
-		if (init_fpu(tsk)) {
-			/*
-			 * ran out of memory!
-			 */
-			do_group_exit(SIGKILL);
-			return;
-		}
-		local_irq_disable();
-	}
+	if (!tsk_used_math(tsk))
+		init_fpu(tsk);
 
 	restore_xstates(tsk, XCNTXT_LAZY);
 }
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a188362..62f2df8 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -264,7 +264,6 @@ int restore_xstates_sigframe(void __user *buf, unsigned int size)
 	struct _fpstate_ia32 __user *fp = buf;
 	struct xsave_struct *xsave;
 	u64 xstate_mask = pcntxt_mask & XCNTXT_NONLAZY;
-	int err;
 
 	if (!buf) {
 		if (used_math()) {
@@ -277,11 +276,8 @@ int restore_xstates_sigframe(void __user *buf, unsigned int size)
 	if (!access_ok(VERIFY_READ, buf, size))
 		return -EACCES;
 
-	if (!used_math()) {
-		err = init_fpu(tsk);
-		if (err)
-			return err;
-	}
+	if (!used_math())
+		init_fpu(tsk);
 
 	if (!HAVE_HWFP) {
 		set_used_math();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bc04e15..17e52a9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5386,8 +5386,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	int r;
 	sigset_t sigsaved;
 
-	if (!tsk_used_math(current) && init_fpu(current))
-		return -ENOMEM;
+	if (!tsk_used_math(current))
+		init_fpu(current);
 
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 7718541..472e2b9 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -147,12 +147,8 @@ void math_emulate(struct math_emu_info *info)
 	unsigned long code_limit = 0;	/* Initialized to stop compiler warnings */
 	struct desc_struct code_descriptor;
 
-	if (!used_math()) {
-		if (init_fpu(current)) {
-			do_group_exit(SIGKILL);
-			return;
-		}
-	}
+	if (!used_math())
+		init_fpu(current);
 
 #ifdef RE_ENTRANT_CHECKING
 	if (emulating) {
-- 
1.5.6.5



  parent reply	other threads:[~2011-04-05 15:52 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-03-09 19:14 [RFC 0/8] rework of extended state handling, LWP support Hans Rosenfeld
2011-03-09 19:14 ` [RFC 1/8] x86, xsave: cleanup fpu/xsave support Hans Rosenfeld
2011-03-09 19:14 ` [RFC 2/8] x86, xsave: rework " Hans Rosenfeld
2011-03-09 19:14 ` [RFC 3/8] x86, xsave: cleanup fpu/xsave signal frame setup Hans Rosenfeld
2011-03-09 19:14 ` [RFC 4/8] x86, xsave: remove unused code Hans Rosenfeld
2011-03-09 19:14 ` [RFC 5/8] x86, xsave: more cleanups Hans Rosenfeld
2011-03-09 19:15 ` [RFC 6/8] x86, xsave: add support for non-lazy xstates Hans Rosenfeld
2011-03-09 19:15 ` [RFC 7/8] x86, xsave: add kernel support for AMDs Lightweight Profiling (LWP) Hans Rosenfeld
2011-03-09 19:15 ` [RFC 8/8] x86, xsave: remove lazy allocation of xstate area Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 0/8] x86, xsave: rework of extended state handling, LWP support Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 1/8] x86, xsave: cleanup fpu/xsave support Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 2/8] x86, xsave: rework " Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 3/8] x86, xsave: cleanup fpu/xsave signal frame setup Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 4/8] x86, xsave: remove unused code Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 5/8] x86, xsave: more cleanups Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 6/8] x86, xsave: add support for non-lazy xstates Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 7/8] x86, xsave: add kernel support for AMDs Lightweight Profiling (LWP) Hans Rosenfeld
2011-03-23 15:27   ` [RFC v2 8/8] x86, xsave: remove lazy allocation of xstate area Hans Rosenfeld
2011-03-24 11:39     ` Brian Gerst
2011-03-29 14:17       ` Hans Rosenfeld
2011-03-29 15:27         ` H. Peter Anvin
2011-03-30 13:11           ` Hans Rosenfeld
2011-04-05 15:50           ` [RFC v3 0/8] x86, xsave: rework of extended state handling, LWP support Hans Rosenfeld
2011-04-07  7:23             ` Ingo Molnar
2011-04-07 15:30               ` Hans Rosenfeld
2011-04-07 16:08                 ` [RFC v4 6/8] x86, xsave: add support for non-lazy xstates Hans Rosenfeld
2011-04-07 16:08                 ` [RFC v4 8/8] x86, xsave: remove lazy allocation of xstate area Hans Rosenfeld
2011-04-13 10:58                 ` [PATCH] x86, xsave: fix non-lazy allocation of the xsave area Hans Rosenfeld
2011-04-13 23:21                   ` H. Peter Anvin
2011-04-15 16:47                     ` [PATCH 1/1] " Hans Rosenfeld
2011-05-16 19:10               ` [RFC v3 0/8] x86, xsave: rework of extended state handling, LWP support Hans Rosenfeld
2011-05-17 11:30                 ` Ingo Molnar
2011-05-17 15:22                   ` Hans Rosenfeld
2011-05-18 11:22                     ` Ingo Molnar
2011-05-18 13:51                     ` Ingo Molnar
2011-05-18  8:16                   ` Joerg Roedel
2011-05-18 10:59                     ` Ingo Molnar
2011-05-18 18:02                   ` Andreas Herrmann
2011-04-05 15:50           ` [RFC v3 1/8] x86, xsave: cleanup fpu/xsave support Hans Rosenfeld
2011-04-05 15:50           ` [RFC v3 2/8] x86, xsave: rework " Hans Rosenfeld
2011-04-05 15:50           ` [RFC v3 3/8] x86, xsave: cleanup fpu/xsave signal frame setup Hans Rosenfeld
2011-04-05 15:50           ` [RFC v3 4/8] x86, xsave: remove unused code Hans Rosenfeld
2011-04-05 15:50           ` [RFC v3 5/8] x86, xsave: more cleanups Hans Rosenfeld
2011-04-05 15:50           ` [RFC v3 6/8] x86, xsave: add support for non-lazy xstates Hans Rosenfeld
2011-04-05 15:50           ` [RFC v3 7/8] x86, xsave: add kernel support for AMDs Lightweight Profiling (LWP) Hans Rosenfeld
2011-04-06 22:06             ` [tip:x86/xsave] " tip-bot for Hans Rosenfeld
2011-04-05 15:50           ` Hans Rosenfeld [this message]
2011-04-06 22:06             ` [tip:x86/xsave] x86, xsave: remove lazy allocation of xstate area tip-bot for Hans Rosenfeld

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1302018656-586370-9-git-send-email-hans.rosenfeld@amd.com \
    --to=hans.rosenfeld@amd.com \
    --cc=Andreas.Herrmann3@amd.com \
    --cc=brgerst@gmail.com \
    --cc=eranian@google.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=robert.richter@amd.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).