linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: riel@redhat.com
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, tglx@linutronix.de, pbonzini@redhat.com,
	mingo@redhat.com, luto@kernel.org, hpa@zytor.com,
	dave.hansen@linux.intel.com, bp@suse.de
Subject: [PATCH RFC 1/5] x86,fpu: split prev/next task fpu state handling
Date: Sat,  1 Oct 2016 16:31:31 -0400	[thread overview]
Message-ID: <1475353895-22175-2-git-send-email-riel@redhat.com> (raw)
In-Reply-To: <1475353895-22175-1-git-send-email-riel@redhat.com>

From: Rik van Riel <riel@redhat.com>

Move all handling of the next state FPU state handling into
switch_fpu_finish, in preparation for more lazily switching
FPU states.

CR0.TS state is mirrored in a per-cpu variable, instead of
being passed around in a local variable, because that will
not be possible later in the series.

Signed-off-by: Rik van Riel <riel@redhat.com>
---
 arch/x86/include/asm/fpu/internal.h | 78 ++++++++++++++++---------------------
 arch/x86/kernel/fpu/core.c          |  1 +
 arch/x86/kernel/process_32.c        |  5 +--
 arch/x86/kernel/process_64.c        |  5 +--
 4 files changed, 39 insertions(+), 50 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 2737366ea583..79e1cee9f3b0 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -482,6 +482,7 @@ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size)
  */
 
 DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+DECLARE_PER_CPU(bool, fpu_active);
 
 /*
  * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
@@ -577,28 +578,15 @@ static inline void fpregs_deactivate(struct fpu *fpu)
  *
  * This is a two-stage process:
  *
- *  - switch_fpu_prepare() saves the old state and
- *    sets the new state of the CR0.TS bit. This is
- *    done within the context of the old process.
+ *  - switch_fpu_prepare() saves the old state.
+ *    This is done within the context of the old process.
  *
- *  - switch_fpu_finish() restores the new state as
- *    necessary.
+ *  - switch_fpu_finish() restores the new state
+ *    and flips CR0.TS as necessary.
  */
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t
-switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+static inline void
+switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	fpu_switch_t fpu;
-
-	/*
-	 * If the task has used the math, pre-load the FPU on xsave processors
-	 * or if the past 5 consecutive context-switches used math.
-	 */
-	fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
-		      new_fpu->fpstate_active &&
-		      (use_eager_fpu() || new_fpu->counter > 5);
-
 	if (old_fpu->fpregs_active) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
@@ -608,29 +596,10 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
 		/* But leave fpu_fpregs_owner_ctx! */
 		old_fpu->fpregs_active = 0;
 		trace_x86_fpu_regs_deactivated(old_fpu);
-
-		/* Don't change CR0.TS if we just switch! */
-		if (fpu.preload) {
-			new_fpu->counter++;
-			__fpregs_activate(new_fpu);
-			trace_x86_fpu_regs_activated(new_fpu);
-			prefetch(&new_fpu->state);
-		} else {
-			__fpregs_deactivate_hw();
-		}
 	} else {
 		old_fpu->counter = 0;
 		old_fpu->last_cpu = -1;
-		if (fpu.preload) {
-			new_fpu->counter++;
-			if (fpu_want_lazy_restore(new_fpu, cpu))
-				fpu.preload = 0;
-			else
-				prefetch(&new_fpu->state);
-			fpregs_activate(new_fpu);
-		}
 	}
-	return fpu;
 }
 
 /*
@@ -638,15 +607,36 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
  */
 
 /*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
+ * Set up the userspace FPU context for the new task.
  */
-static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
+static inline void switch_fpu_finish(struct fpu *new_fpu)
 {
-	if (fpu_switch.preload)
+	bool preload;
+	/*
+	 * If the task has used the math, pre-load the FPU on xsave processors
+	 * or if the past 5 consecutive context-switches used math.
+	 */
+	preload = static_cpu_has(X86_FEATURE_FPU) &&
+		  new_fpu->fpstate_active &&
+		  (use_eager_fpu() || new_fpu->counter > 5);
+
+	if (preload) {
+		prefetch(&new_fpu->state);
+		new_fpu->counter++;
+		__fpregs_activate(new_fpu);
+		trace_x86_fpu_regs_activated(new_fpu);
+
+		/* Don't change CR0.TS if we just switch! */
+		if (!__this_cpu_read(fpu_active)) {
+			__fpregs_activate_hw();
+			__this_cpu_write(fpu_active, true);
+		}
+
 		copy_kernel_to_fpregs(&new_fpu->state);
+	} else if (__this_cpu_read(fpu_active)) {
+		__this_cpu_write(fpu_active, false);
+		__fpregs_deactivate_hw();
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 3fc03a09a93b..82cd46584528 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -39,6 +39,7 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
  * Track which context is using the FPU on the CPU:
  */
 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+DEFINE_PER_CPU(bool, fpu_active) = false;
 
 static void kernel_fpu_disable(void)
 {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29c38c7..8cd2f42190dc 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -247,11 +247,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-	fpu_switch_t fpu_switch;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
-	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
+	switch_fpu_prepare(prev_fpu, cpu);
 
 	/*
 	 * Save away %gs. No need to save %fs, as it was saved on the
@@ -310,7 +309,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (prev->gs | next->gs)
 		lazy_load_gs(next->gs);
 
-	switch_fpu_finish(next_fpu, fpu_switch);
+	switch_fpu_finish(next_fpu);
 
 	this_cpu_write(current_task, next_p);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8f84bf..92b9485a6a18 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -264,9 +264,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	unsigned prev_fsindex, prev_gsindex;
-	fpu_switch_t fpu_switch;
 
-	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
+	switch_fpu_prepare(prev_fpu, cpu);
 
 	/* We must save %fs and %gs before load_TLS() because
 	 * %fs and %gs may be cleared by load_TLS().
@@ -416,7 +415,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		prev->gsbase = 0;
 	prev->gsindex = prev_gsindex;
 
-	switch_fpu_finish(next_fpu, fpu_switch);
+	switch_fpu_finish(next_fpu);
 
 	/*
 	 * Switch the PDA and FPU contexts.
-- 
2.7.4

  reply	other threads:[~2016-10-01 20:49 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-01 20:31 [PATCH RFC 0/5] x86,fpu: make FPU context switching much lazier riel
2016-10-01 20:31 ` riel [this message]
2016-10-01 23:26   ` [PATCH RFC 1/5] x86,fpu: split prev/next task fpu state handling Andy Lutomirski
2016-10-02  0:02     ` Rik van Riel
2016-10-01 20:31 ` [PATCH RFC 2/5] x86,fpu: delay FPU register loading until switch to userspace riel
2016-10-01 23:44   ` Andy Lutomirski
2016-10-02  0:08     ` Rik van Riel
2016-10-03 20:54       ` Andy Lutomirski
2016-10-03 21:21         ` Rik van Riel
2016-10-03 21:36           ` Andy Lutomirski
2016-10-04  1:29             ` Rik van Riel
2016-10-04  2:09               ` Andy Lutomirski
2016-10-04  2:47                 ` Rik van Riel
2016-10-04  3:02                   ` Andy Lutomirski
2016-10-04  6:35                 ` Ingo Molnar
2016-10-04 12:48                   ` Rik van Riel
2016-10-04  2:11             ` Rik van Riel
2016-10-04  3:02               ` Andy Lutomirski
2016-10-02  0:42     ` Rik van Riel
2016-10-03 16:23       ` Dave Hansen
2016-10-01 20:31 ` [PATCH RFC 3/5] x86,fpu: add kernel fpu argument to __kernel_fpu_begin riel
2016-10-01 20:31 ` [PATCH RFC 4/5] x86,fpu: lazily skip FPU restore when still loaded riel
2016-10-03 20:04   ` Dave Hansen
2016-10-03 20:22     ` Rik van Riel
2016-10-03 20:49       ` Dave Hansen
2016-10-03 21:02         ` Rik van Riel
2016-10-01 20:31 ` [PATCH RFC 5/5] x86,fpu: kinda sorta fix up signal path riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1475353895-22175-2-git-send-email-riel@redhat.com \
    --to=riel@redhat.com \
    --cc=bp@suse.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).