All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Andy Lutomirski <luto@amacapital.net>,
	Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Fenghua Yu <fenghua.yu@intel.com>,
	"H . Peter Anvin" <hpa@zytor.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Oleg Nesterov <oleg@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Rik van Riel <riel@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Yu-cheng Yu <yu-cheng.yu@intel.com>
Subject: [PATCH 1/7] x86/fpu: Simplify the fpu->last_cpu logic and rename it to fpu->fpregs_cached
Date: Thu, 26 Jan 2017 12:26:23 +0100	[thread overview]
Message-ID: <1485429989-23340-2-git-send-email-mingo@kernel.org> (raw)
In-Reply-To: <1485429989-23340-1-git-send-email-mingo@kernel.org>

fpu->last_cpu records the last CPU a given FPU context structure was used on.
This enables an important optimization: if a task schedules out to a kernel
thread and then gets scheduled back after only FPU-inactive kernel threads
executed, the FPU state in the registers is still intact and the FPU restore
can be skipped - speeding up the context switch.

The same logic can be implemented slightly simpler, by using a single boolean
flag: fpu->fpregs_cached tells us whether the context's FPU registers are
cached in the CPU.

The only difference is that this flag has to be invalidated when a task is
migrated away from its CPU - but that is a slow path compared to context
switches.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 15 ++++++++-------
 arch/x86/include/asm/fpu/types.h    | 24 ++++++++++--------------
 arch/x86/include/asm/switch_to.h    | 10 ++++++++++
 arch/x86/kernel/fpu/core.c          |  2 +-
 kernel/sched/core.c                 |  2 ++
 kernel/sched/sched.h                |  8 ++++++++
 6 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 255645f60ca2..2eaf93cf11cc 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -490,7 +490,7 @@ DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 
 /*
  * The in-register FPU state for an FPU context on a CPU is assumed to be
- * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
+ * valid if fpu->fpregs_cached is still set, and if the fpu_fpregs_owner_ctx
  * matches the FPU.
  *
  * If the FPU register state is valid, the kernel can skip restoring the
@@ -512,12 +512,12 @@ static inline void __cpu_invalidate_fpregs_state(void)
 
 static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
 {
-	fpu->last_cpu = -1;
+	fpu->fpregs_cached = 0;
 }
 
 static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
 {
-	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && fpu->fpregs_cached;
 }
 
 /*
@@ -573,15 +573,16 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
 	if (old_fpu->fpregs_active) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
-			old_fpu->last_cpu = -1;
+			old_fpu->fpregs_cached = 0;
 		else
-			old_fpu->last_cpu = cpu;
+			old_fpu->fpregs_cached = 1;
 
 		/* But leave fpu_fpregs_owner_ctx! */
 		old_fpu->fpregs_active = 0;
 		trace_x86_fpu_regs_deactivated(old_fpu);
-	} else
-		old_fpu->last_cpu = -1;
+	} else {
+		old_fpu->fpregs_cached = 0;
+	}
 }
 
 /*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b9c09d..3090b0d7b232 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -276,20 +276,6 @@ union fpregs_state {
  */
 struct fpu {
 	/*
-	 * @last_cpu:
-	 *
-	 * Records the last CPU on which this context was loaded into
-	 * FPU registers. (In the lazy-restore case we might be
-	 * able to reuse FPU registers across multiple context switches
-	 * this way, if no intermediate task used the FPU.)
-	 *
-	 * A value of -1 is used to indicate that the FPU state in context
-	 * memory is newer than the FPU state in registers, and that the
-	 * FPU state should be reloaded next time the task is run.
-	 */
-	unsigned int			last_cpu;
-
-	/*
 	 * @fpstate_active:
 	 *
 	 * This flag indicates whether this context is active: if the task
@@ -322,6 +308,16 @@ struct fpu {
 	unsigned char			fpregs_active;
 
 	/*
+	 * @fpregs_cached:
+	 *
+	 * This flag tells us whether this context is loaded into a CPU
+	 * right now.
+	 *
+	 * This is set to 0 if a task is migrated to another CPU.
+	 */
+	unsigned char			fpregs_cached;
+
+	/*
 	 * @state:
 	 *
 	 * In-memory copy of all FPU registers that we save/restore
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index fcc5cd387fd1..a7146dadb31d 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -72,4 +72,14 @@ do {									\
 	((last) = __switch_to_asm((prev), (next)));			\
 } while (0)
 
+
+/*
+ * The task-migration arch callback clears the FPU registers cache:
+ */
+static inline void arch_task_migrate(struct task_struct *p)
+{
+	p->thread.fpu.fpregs_cached = 0;
+}
+#define arch_task_migrate arch_task_migrate
+
 #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f070c2d..287f1cb32b59 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
 	dst_fpu->fpregs_active = 0;
-	dst_fpu->last_cpu = -1;
+	dst_fpu->fpregs_cached = 0;
 
 	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
 		return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c56fb57f2991..7eb2f3041fde 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1253,6 +1253,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 			p->sched_class->migrate_task_rq(p);
 		p->se.nr_migrations++;
 		perf_event_task_migrate(p);
+
+		arch_task_migrate(p);
 	}
 
 	__set_task_cpu(p, new_cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7b34c7826ca5..ff8a894132e4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1824,3 +1824,11 @@ static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
 #else /* arch_scale_freq_capacity */
 #define arch_scale_freq_invariant()	(false)
 #endif
+
+/*
+ * Default task-migration arch callback:
+ */
+#ifndef arch_task_migrate
+static inline void arch_task_migrate(struct task_struct *p) { }
+#endif
+
-- 
2.7.4

  reply	other threads:[~2017-01-26 11:26 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-26 11:26 [PATCH 0/7] x86/fpu: Simplify the FPU state machine Ingo Molnar
2017-01-26 11:26 ` Ingo Molnar [this message]
2017-01-26 14:23   ` [PATCH 1/7] x86/fpu: Simplify the fpu->last_cpu logic and rename it to fpu->fpregs_cached Rik van Riel
2017-01-26 14:53     ` Ingo Molnar
2017-01-26 15:05       ` [PATCH] x86/fpu: Unify the naming of the FPU register cache validity flags Ingo Molnar
2017-01-26 15:31         ` Peter Zijlstra
2017-01-26 14:54   ` [PATCH 1/7] x86/fpu: Simplify the fpu->last_cpu logic and rename it to fpu->fpregs_cached Rik van Riel
2017-01-26 15:09     ` Ingo Molnar
2017-01-26 16:51     ` Andy Lutomirski
2017-01-26 11:26 ` [PATCH 2/7] x86/fpu: Simplify fpu->fpregs_active use Ingo Molnar
2017-01-26 16:30   ` Andy Lutomirski
2017-01-26 11:26 ` [PATCH 3/7] x86/fpu: Make the fpu state change in fpu__clear() scheduler-atomic Ingo Molnar
2017-01-26 11:26 ` [PATCH 4/7] x86/fpu: Split the state handling in fpu__drop() Ingo Molnar
2017-01-26 11:26 ` [PATCH 5/7] x86/fpu: Change fpu->fpregs_active users to fpu->fpstate_active Ingo Molnar
2017-01-26 14:44   ` Rik van Riel
2017-01-26 15:16     ` Ingo Molnar
2017-01-26 15:45       ` Rik van Riel
2017-01-26 15:53         ` Ingo Molnar
2017-01-26 17:00           ` Andy Lutomirski
2017-01-26 18:04             ` Rik van Riel
2017-01-26 11:26 ` [PATCH 6/7] x86/fpu: Decouple fpregs_activate()/fpregs_deactivate() from fpu->fpregs_active Ingo Molnar
2017-01-26 11:26 ` [PATCH 7/7] x86/fpu: Remove struct fpu::fpregs_active Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1485429989-23340-2-git-send-email-mingo@kernel.org \
    --to=mingo@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=yu-cheng.yu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.