From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752532AbdAZL0s (ORCPT ); Thu, 26 Jan 2017 06:26:48 -0500 Received: from mail-wm0-f68.google.com ([74.125.82.68]:34834 "EHLO mail-wm0-f68.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752351AbdAZL0o (ORCPT ); Thu, 26 Jan 2017 06:26:44 -0500 From: Ingo Molnar To: linux-kernel@vger.kernel.org Cc: Andrew Morton , Andy Lutomirski , Borislav Petkov , Dave Hansen , Fenghua Yu , "H . Peter Anvin" , Linus Torvalds , Oleg Nesterov , Peter Zijlstra , Rik van Riel , Thomas Gleixner , Yu-cheng Yu Subject: [PATCH 1/7] x86/fpu: Simplify the fpu->last_cpu logic and rename it to fpu->fpregs_cached Date: Thu, 26 Jan 2017 12:26:23 +0100 Message-Id: <1485429989-23340-2-git-send-email-mingo@kernel.org> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1485429989-23340-1-git-send-email-mingo@kernel.org> References: <1485429989-23340-1-git-send-email-mingo@kernel.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org fpu->last_cpu records the last CPU a given FPU context structure was used on. This enables an important optimization: if a task schedules out to a kernel thread and then gets scheduled back after only FPU-inactive kernel threads executed, the FPU state in the registers is still intact and the FPU restore can be skipped - speeding up the context switch. The same logic can be implemented slightly simpler, by using a single boolean flag: fpu->fpregs_cached tells us whether the context's FPU registers are cached in the CPU. The only difference is that this flag has to be invalidated when a task is migrated away from its CPU - but that is a slow path compared to context switches. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 15 ++++++++------- arch/x86/include/asm/fpu/types.h | 24 ++++++++++-------------- arch/x86/include/asm/switch_to.h | 10 ++++++++++ arch/x86/kernel/fpu/core.c | 2 +- kernel/sched/core.c | 2 ++ kernel/sched/sched.h | 8 ++++++++ 6 files changed, 39 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 255645f60ca2..2eaf93cf11cc 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -490,7 +490,7 @@ DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); /* * The in-register FPU state for an FPU context on a CPU is assumed to be - * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx + * valid if fpu->fpregs_cached is still set, and if the fpu_fpregs_owner_ctx * matches the FPU. * * If the FPU register state is valid, the kernel can skip restoring the @@ -512,12 +512,12 @@ static inline void __cpu_invalidate_fpregs_state(void) static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) { - fpu->last_cpu = -1; + fpu->fpregs_cached = 0; } static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) { - return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; + return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && fpu->fpregs_cached; } /* @@ -573,15 +573,16 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (old_fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(old_fpu)) - old_fpu->last_cpu = -1; + old_fpu->fpregs_cached = 0; else - old_fpu->last_cpu = cpu; + old_fpu->fpregs_cached = 1; /* But leave fpu_fpregs_owner_ctx! */ old_fpu->fpregs_active = 0; trace_x86_fpu_regs_deactivated(old_fpu); - } else - old_fpu->last_cpu = -1; + } else { + old_fpu->fpregs_cached = 0; + } } /* diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3c80f5b9c09d..3090b0d7b232 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -276,20 +276,6 @@ union fpregs_state { */ struct fpu { /* - * @last_cpu: - * - * Records the last CPU on which this context was loaded into - * FPU registers. (In the lazy-restore case we might be - * able to reuse FPU registers across multiple context switches - * this way, if no intermediate task used the FPU.) - * - * A value of -1 is used to indicate that the FPU state in context - * memory is newer than the FPU state in registers, and that the - * FPU state should be reloaded next time the task is run. - */ - unsigned int last_cpu; - - /* * @fpstate_active: * * This flag indicates whether this context is active: if the task @@ -322,6 +308,16 @@ struct fpu { unsigned char fpregs_active; /* + * @fpregs_cached: + * + * This flag tells us whether this context is loaded into a CPU + * right now. + * + * This is set to 0 if a task is migrated to another CPU. + */ + unsigned char fpregs_cached; + + /* * @state: * * In-memory copy of all FPU registers that we save/restore diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index fcc5cd387fd1..a7146dadb31d 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -72,4 +72,14 @@ do { \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) + +/* + * The task-migration arch callback clears the FPU registers cache: + */ +static inline void arch_task_migrate(struct task_struct *p) +{ + p->thread.fpu.fpregs_cached = 0; +} +#define arch_task_migrate arch_task_migrate + #endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e1114f070c2d..287f1cb32b59 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(fpstate_init); int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { dst_fpu->fpregs_active = 0; - dst_fpu->last_cpu = -1; + dst_fpu->fpregs_cached = 0; if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c56fb57f2991..7eb2f3041fde 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1253,6 +1253,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) p->sched_class->migrate_task_rq(p); p->se.nr_migrations++; perf_event_task_migrate(p); + + arch_task_migrate(p); } __set_task_cpu(p, new_cpu); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7b34c7826ca5..ff8a894132e4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1824,3 +1824,11 @@ static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} #else /* arch_scale_freq_capacity */ #define arch_scale_freq_invariant() (false) #endif + +/* + * Default task-migration arch callback: + */ +#ifndef arch_task_migrate +static inline void arch_task_migrate(struct task_struct *p) { } +#endif + -- 2.7.4