From: Andi Kleen <ak@suse.de>
To: Dave Jones <davej@codemonkey.org.uk>,
"Martin J. Bligh" <mbligh@aracnet.com>,
ak@suse.de, linux-kernel <linux-kernel@vger.kernel.org>
Subject: Re: [Bug 350] New: i386 context switch very slow compared to 2.4 due to wrmsr (performance)
Date: Wed, 12 Feb 2003 08:50:48 +0100 [thread overview]
Message-ID: <20030212075048.GA9049@wotan.suse.de> (raw)
In-Reply-To: <20030212025902.GA14092@codemonkey.org.uk>
On Wed, Feb 12, 2003 at 02:59:02AM +0000, Dave Jones wrote:
> On Tue, Feb 11, 2003 at 05:35:43PM -0800, Martin J. Bligh wrote:
>
> > The reason it rewrites SYSENTER_CS is non obviously vm86 which
> > doesn't guarantee the MSR stays constant (sigh). I think this would
> > be better handled by having a global flag or process flag when any process
> > uses vm86 and not do it when this flag is not set (as in 99% of all
> > normal use cases)
>
> I feel I'm missing something obvious here, but is this part the
> low-hanging fruit that it seems ?
Yes I implemented a similar patch now too last night. It also fixes a few other
fast path bugs in __switch_to
- Fix false sharing in the GDT and replace an imul with a shift.
Really pad the GDT to cache lines now.
- Don't use LOCK prefixes in bit operations when accessing the
thread_info flags of the switched threads. LOCK is very slow on P4
and it isn't necessary here.
Really we should have __set_bit/__test_bit without memory barrier
and atomic stuff on each arch and use that for thread_info.h,
but for now do it this way.
[this is a port from x86-64]
- Inline FPU switch - it is only a few lines.
But I must say I don't know vm86() semantics enough to know if this is
good enough, especially when the clear the TIF_VM86 flag. Could someone
more familiar with it review it?
BTW vm86.c at the first look doesn't look very preempt safe to me.
comments?
-Andi
diff -burpN -X ../KDIFX linux-2.5.60/arch/i386/kernel/cpu/common.c linux-2.5.60-work/arch/i386/kernel/cpu/common.c
--- linux-2.5.60/arch/i386/kernel/cpu/common.c 2003-02-10 19:37:57.000000000 +0100
+++ linux-2.5.60-work/arch/i386/kernel/cpu/common.c 2003-02-12 01:42:01.000000000 +0100
@@ -484,7 +484,7 @@ void __init cpu_init (void)
BUG();
enter_lazy_tlb(&init_mm, current, cpu);
- load_esp0(t, thread->esp0);
+ load_esp0(current, t, thread->esp0);
set_tss_desc(cpu,t);
cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
diff -burpN -X ../KDIFX linux-2.5.60/arch/i386/kernel/i387.c linux-2.5.60-work/arch/i386/kernel/i387.c
--- linux-2.5.60/arch/i386/kernel/i387.c 2003-02-10 19:39:17.000000000 +0100
+++ linux-2.5.60-work/arch/i386/kernel/i387.c 2003-02-11 23:51:58.000000000 +0100
@@ -52,24 +52,6 @@ void init_fpu(struct task_struct *tsk)
* FPU lazy state save handling.
*/
-static inline void __save_init_fpu( struct task_struct *tsk )
-{
- if ( cpu_has_fxsr ) {
- asm volatile( "fxsave %0 ; fnclex"
- : "=m" (tsk->thread.i387.fxsave) );
- } else {
- asm volatile( "fnsave %0 ; fwait"
- : "=m" (tsk->thread.i387.fsave) );
- }
- clear_tsk_thread_flag(tsk, TIF_USEDFPU);
-}
-
-void save_init_fpu( struct task_struct *tsk )
-{
- __save_init_fpu(tsk);
- stts();
-}
-
void kernel_fpu_begin(void)
{
preempt_disable();
diff -burpN -X ../KDIFX linux-2.5.60/arch/i386/kernel/process.c linux-2.5.60-work/arch/i386/kernel/process.c
--- linux-2.5.60/arch/i386/kernel/process.c 2003-02-10 19:37:54.000000000 +0100
+++ linux-2.5.60-work/arch/i386/kernel/process.c 2003-02-12 01:40:02.000000000 +0100
@@ -437,7 +437,7 @@ void __switch_to(struct task_struct *pre
/*
* Reload esp0, LDT and the page table pointer:
*/
- load_esp0(tss, next->esp0);
+ load_esp0(prev_p, tss, next->esp0);
/*
* Load the per-thread Thread-Local Storage descriptor.
diff -burpN -X ../KDIFX linux-2.5.60/arch/i386/kernel/vm86.c linux-2.5.60-work/arch/i386/kernel/vm86.c
--- linux-2.5.60/arch/i386/kernel/vm86.c 2003-02-10 19:37:58.000000000 +0100
+++ linux-2.5.60-work/arch/i386/kernel/vm86.c 2003-02-12 01:46:51.000000000 +0100
@@ -114,7 +117,7 @@ struct pt_regs * save_v86_state(struct k
}
tss = init_tss + smp_processor_id();
current->thread.esp0 = current->thread.saved_esp0;
- load_esp0(tss, current->thread.esp0);
+ load_esp0(current, tss, current->thread.esp0);
current->thread.saved_esp0 = 0;
loadsegment(fs, current->thread.saved_fs);
loadsegment(gs, current->thread.saved_gs);
@@ -309,6 +313,10 @@ static inline void return_to_32bit(struc
{
struct pt_regs * regs32;
+ /* FIXME should disable preemption here but how can we reenable it? */
+
+ enable_sysenter();
+
regs32 = save_v86_state(regs16);
regs32->eax = retval;
__asm__ __volatile__("movl %0,%%esp\n\t"
diff -burpN -X ../KDIFX linux-2.5.60/arch/x86_64/kernel/process.c linux-2.5.60-work/arch/x86_64/kernel/process.c
--- linux-2.5.60/arch/x86_64/kernel/process.c 2003-02-10 19:37:56.000000000 +0100
+++ linux-2.5.60-work/arch/x86_64/kernel/process.c 2003-02-12 01:51:00.000000000 +0100
@@ -41,6 +41,7 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/slab.h>
+#include <linux/thread_info.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
diff -burpN -X ../KDIFX linux-2.5.60/include/asm-i386/i387.h linux-2.5.60-work/include/asm-i386/i387.h
--- linux-2.5.60/include/asm-i386/i387.h 2003-02-10 19:38:49.000000000 +0100
+++ linux-2.5.60-work/include/asm-i386/i387.h 2003-02-12 01:21:13.000000000 +0100
@@ -21,23 +21,41 @@ extern void init_fpu(struct task_struct
/*
* FPU lazy state save handling...
*/
-extern void save_init_fpu( struct task_struct *tsk );
extern void restore_fpu( struct task_struct *tsk );
extern void kernel_fpu_begin(void);
#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+ if ( cpu_has_fxsr ) {
+ asm volatile( "fxsave %0 ; fnclex"
+ : "=m" (tsk->thread.i387.fxsave) );
+ } else {
+ asm volatile( "fnsave %0 ; fwait"
+ : "=m" (tsk->thread.i387.fsave) );
+ }
+ tsk->thread_info->flags &= ~TIF_USEDFPU;
+}
+
+static inline void save_init_fpu( struct task_struct *tsk )
+{
+ __save_init_fpu(tsk);
+ stts();
+}
+
+
#define unlazy_fpu( tsk ) do { \
- if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) \
+ if ((tsk)->thread_info->flags & _TIF_USEDFPU) \
save_init_fpu( tsk ); \
} while (0)
#define clear_fpu( tsk ) \
do { \
- if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \
+ if ((tsk)->thread_info->flags & _TIF_USEDFPU) { \
asm volatile("fwait"); \
- clear_tsk_thread_flag(tsk, TIF_USEDFPU); \
+ (tsk)->thread_info->flags &= ~_TIF_USEDFPU; \
stts(); \
} \
} while (0)
diff -burpN -X ../KDIFX linux-2.5.60/include/asm-i386/processor.h linux-2.5.60-work/include/asm-i386/processor.h
--- linux-2.5.60/include/asm-i386/processor.h 2003-02-10 19:37:57.000000000 +0100
+++ linux-2.5.60-work/include/asm-i386/processor.h 2003-02-12 01:52:28.000000000 +0100
@@ -408,20 +408,30 @@ struct thread_struct {
.io_bitmap = { [ 0 ... IO_BITMAP_SIZE ] = ~0 }, \
}
-static inline void load_esp0(struct tss_struct *tss, unsigned long esp0)
-{
- tss->esp0 = esp0;
- if (cpu_has_sep) {
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, esp0, 0);
- }
-}
-
-static inline void disable_sysenter(void)
-{
- if (cpu_has_sep)
- wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
-}
+#define load_esp0(prev, tss, _esp0) do { \
+ (tss)->esp0 = _esp0; \
+ if (cpu_has_sep) { \
+ if (unlikely((prev)->thread_info->flags & _TIF_VM86)) \
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); \
+ wrmsr(MSR_IA32_SYSENTER_ESP, (_esp0), 0); \
+ } \
+} while(0)
+
+/* The caller of the next two functions should have disabled preemption. */
+
+#define disable_sysenter() do { \
+ if (cpu_has_sep) { \
+ set_thread_flag(TIF_VM86); \
+ wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); \
+ } \
+} while(0)
+
+#define enable_sysenter() do { \
+ if (cpu_has_sep) { \
+ clear_thread_flag(TIF_VM86); \
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); \
+ } \
+} while(0)
#define start_thread(regs, new_eip, new_esp) do { \
__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
diff -burpN -X ../KDIFX linux-2.5.60/include/asm-i386/segment.h linux-2.5.60-work/include/asm-i386/segment.h
--- linux-2.5.60/include/asm-i386/segment.h 2003-02-10 19:38:06.000000000 +0100
+++ linux-2.5.60-work/include/asm-i386/segment.h 2003-02-11 23:56:37.000000000 +0100
@@ -67,7 +67,7 @@
/*
* The GDT has 25 entries but we pad it to cacheline boundary:
*/
-#define GDT_ENTRIES 28
+#define GDT_ENTRIES 32
#define GDT_SIZE (GDT_ENTRIES * 8)
diff -burpN -X ../KDIFX linux-2.5.60/include/asm-i386/thread_info.h linux-2.5.60-work/include/asm-i386/thread_info.h
--- linux-2.5.60/include/asm-i386/thread_info.h 2003-02-10 19:37:59.000000000 +0100
+++ linux-2.5.60-work/include/asm-i386/thread_info.h 2003-02-12 01:51:26.000000000 +0100
@@ -111,15 +111,18 @@ static inline struct thread_info *curren
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
#define TIF_IRET 5 /* return with iret */
+#define TIF_VM86 6 /* may use vm86 */
#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
+
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_IRET (1<<TIF_IRET)
+#define _TIF_VM86 (1<<TIF_VM86)
#define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
next prev parent reply other threads:[~2003-02-12 7:41 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-02-12 1:35 [Bug 350] New: i386 context switch very slow compared to 2.4 due to wrmsr (performance) Martin J. Bligh
2003-02-12 2:59 ` Dave Jones
2003-02-12 4:21 ` Jamie Lokier
2003-02-12 5:49 ` Linus Torvalds
2003-02-12 10:12 ` Jamie Lokier
2003-03-10 3:07 ` Linus Torvalds
2003-03-10 11:06 ` Andi Kleen
2003-03-10 18:33 ` Linus Torvalds
2003-03-10 22:44 ` Linus Torvalds
2003-02-12 12:54 ` Dave Jones
2003-02-12 7:50 ` Andi Kleen [this message]
2003-02-12 10:27 ` Jamie Lokier
2003-02-12 10:45 ` Andi Kleen
2003-02-12 17:52 ` Ingo Oeser
2003-02-12 18:13 ` Dave Jones
2003-02-12 18:18 ` Andi Kleen
2003-02-13 2:42 ` Alan Cox
2003-02-13 5:17 ` Eric W. Biederman
2003-02-13 18:07 ` Andi Kleen
2003-02-14 0:14 ` [discuss] " Peter Tattam
2003-02-14 1:29 ` Andi Kleen
2003-02-14 1:51 ` Eric Northup
2003-02-14 2:01 ` Peter Tattam
2003-02-14 4:07 ` Thomas J. Merritt
2003-02-14 9:38 ` Peter Finderup Lund
2003-02-14 8:27 ` Eric W. Biederman
2003-03-19 1:22 ` Rob Landley
2003-02-12 4:18 ` Jamie Lokier
2003-02-12 5:54 ` Linus Torvalds
2003-02-12 10:18 ` Jamie Lokier
2003-02-12 17:24 ` Linus Torvalds
2003-03-18 15:24 ` Kevin Pedretti
2003-03-18 16:41 ` Linus Torvalds
2003-03-18 18:30 ` Brian Gerst
2003-03-18 19:14 ` Thomas Molina
2003-03-18 19:21 ` Linus Torvalds
2003-03-18 20:03 ` Thomas Schlichter
2003-03-18 20:24 ` Steven Cole
2003-03-19 0:42 ` H. Peter Anvin
2003-03-19 2:22 ` george anzinger
[not found] <20030318165013$55f4@gated-at.bofh.it>
[not found] ` <20030318184010$6448@gated-at.bofh.it>
2003-03-18 20:19 ` Pascal Schmidt
2003-03-19 9:55 Ph. Marek
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20030212075048.GA9049@wotan.suse.de \
--to=ak@suse.de \
--cc=davej@codemonkey.org.uk \
--cc=linux-kernel@vger.kernel.org \
--cc=mbligh@aracnet.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).