All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] x86: capabilities handling adjustments
@ 2015-10-14 13:22 Jan Beulich
  2015-10-14 13:29 ` [PATCH 1/4] x86: remove unused x87 remnants of 32-bit days Jan Beulich
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Jan Beulich @ 2015-10-14 13:22 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

1: remove unused x87 remnants of 32-bit days
2: drop further constant cpu_has_* predicates
3: don't use FPU instructions when there is no FPU
4: capabilities: set/clear them using non-locked bitops

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] x86: remove unused x87 remnants of 32-bit days
  2015-10-14 13:22 [PATCH 0/4] x86: capabilities handling adjustments Jan Beulich
@ 2015-10-14 13:29 ` Jan Beulich
  2015-10-14 13:57   ` Andrew Cooper
  2015-10-14 13:30 ` [PATCH 2/4] x86: drop further constant cpu_has_* predicates Jan Beulich
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2015-10-14 13:29 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 5396 bytes --]

x86-64 requires FXSR, XMM, and XMM2, so there's no point in hiding
respective code behind conditionals.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1542,8 +1542,7 @@ static int hvmemul_get_fpu(
             return X86EMUL_UNHANDLEABLE;
         break;
     case X86EMUL_FPU_xmm:
-        if ( !cpu_has_xmm ||
-             (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) ||
+        if ( (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) ||
              !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSFXSR) )
             return X86EMUL_UNHANDLEABLE;
         break;
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -19,15 +19,12 @@
 
 static void fpu_init(void)
 {
-    unsigned long val;
+    uint32_t val = MXCSR_DEFAULT;
     
     asm volatile ( "fninit" );
-    if ( cpu_has_xmm )
-    {
-        /* load default value into MXCSR control/status register */
-        val = MXCSR_DEFAULT;
-        asm volatile ( "ldmxcsr %0" : : "m" (val) );
-    }
+
+    /* load default value into MXCSR control/status register */
+    asm volatile ( "ldmxcsr %0" : : "m" (val) );
 }
 
 /*******************************/
@@ -122,14 +119,6 @@ static inline void fpu_fxrstor(struct vc
     }
 }
 
-/* Restore x87 extended state */
-static inline void fpu_frstor(struct vcpu *v)
-{
-    const char *fpu_ctxt = v->arch.fpu_ctxt;
-
-    asm volatile ( "frstor %0" : : "m" (*fpu_ctxt) );
-}
-
 /*******************************/
 /*      FPU Save Functions     */
 /*******************************/
@@ -206,15 +195,6 @@ static inline void fpu_fxsave(struct vcp
         fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = word_size;
 }
 
-/* Save x87 FPU state */
-static inline void fpu_fsave(struct vcpu *v)
-{
-    char *fpu_ctxt = v->arch.fpu_ctxt;
-
-    /* FWAIT is required to make FNSAVE synchronous. */
-    asm volatile ( "fnsave %0 ; fwait" : "=m" (*fpu_ctxt) );
-}
-
 /*******************************/
 /*       VCPU FPU Functions    */
 /*******************************/
@@ -249,12 +229,7 @@ void vcpu_restore_fpu_lazy(struct vcpu *
     if ( cpu_has_xsave )
         fpu_xrstor(v, XSTATE_LAZY);
     else if ( v->fpu_initialised )
-    {
-        if ( cpu_has_fxsr )
-            fpu_fxrstor(v);
-        else
-            fpu_frstor(v);
-    }
+        fpu_fxrstor(v);
     else
         fpu_init();
 
@@ -278,10 +253,8 @@ static bool_t _vcpu_save_fpu(struct vcpu
 
     if ( cpu_has_xsave )
         fpu_xsave(v);
-    else if ( cpu_has_fxsr )
-        fpu_fxsave(v);
     else
-        fpu_fsave(v);
+        fpu_fxsave(v);
 
     v->fpu_dirtied = 0;
 
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1295,10 +1295,8 @@ void __init noreturn __start_xen(unsigne
 
     identify_cpu(&boot_cpu_data);
 
-    if ( cpu_has_fxsr )
-        set_in_cr4(X86_CR4_OSFXSR);
-    if ( cpu_has_xmm )
-        set_in_cr4(X86_CR4_OSXMMEXCPT);
+    set_in_cr4(X86_CR4_OSFXSR);
+    set_in_cr4(X86_CR4_OSXMMEXCPT);
 
     if ( disable_smep )
         setup_clear_cpu_cap(X86_FEATURE_SMEP);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -184,9 +184,6 @@
 #define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr		1
 #define cpu_has_mmx		1
-#define cpu_has_fxsr		1
-#define cpu_has_xmm		1
-#define cpu_has_xmm2		1
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_syscall		1
--- a/xen/include/asm-x86/i387.h
+++ b/xen/include/asm-x86/i387.h
@@ -17,22 +17,15 @@
 /* Byte offset of the stored word size within the FXSAVE area/portion. */
 #define FPU_WORD_SIZE_OFFSET 511
 
-struct ix87_state {
-    struct ix87_env {
-        uint16_t fcw, _res0;
-        uint16_t fsw, _res1;
-        uint16_t ftw, _res2;
-        uint32_t fip;
-        uint16_t fcs;
-        uint16_t fop;
-        uint32_t fdp;
-        uint16_t fds, _res6;
-    } env;
-    struct __packed ix87_reg {
-        uint64_t mantissa;
-        uint16_t exponent:15;
-        uint16_t sign:1;
-    } r[8];
+struct ix87_env {
+    uint16_t fcw, _res0;
+    uint16_t fsw, _res1;
+    uint16_t ftw, _res2;
+    uint32_t fip;
+    uint16_t fcs;
+    uint16_t fop;
+    uint32_t fdp;
+    uint16_t fds, _res6;
 };
 
 void vcpu_restore_fpu_eager(struct vcpu *v);
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -206,13 +206,10 @@ typedef struct { u64 pfn; } pagetable_t;
 #define pagetable_null()        pagetable_from_pfn(0)
 
 void clear_page_sse2(void *);
-#define clear_page(_p)      (cpu_has_xmm2 ?                             \
-                             clear_page_sse2((void *)(_p)) :            \
-                             (void)memset((void *)(_p), 0, PAGE_SIZE))
 void copy_page_sse2(void *, const void *);
-#define copy_page(_t,_f)    (cpu_has_xmm2 ?                             \
-                             copy_page_sse2(_t, _f) :                   \
-                             (void)memcpy(_t, _f, PAGE_SIZE))
+
+#define clear_page(_p)      clear_page_sse2(_p)
+#define copy_page(_t,_f)    copy_page_sse2(_t, _f)
 
 /* Convert between Xen-heap virtual addresses and machine addresses. */
 #define __pa(x)             (virt_to_maddr(x))



[-- Attachment #2: x86-remove-x87.patch --]
[-- Type: text/plain, Size: 5442 bytes --]

x86: remove unused x87 remnants of 32-bit days

x86-64 requires FXSR, XMM, and XMM2, so there's no point in hiding
respective code behind conditionals.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1542,8 +1542,7 @@ static int hvmemul_get_fpu(
             return X86EMUL_UNHANDLEABLE;
         break;
     case X86EMUL_FPU_xmm:
-        if ( !cpu_has_xmm ||
-             (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) ||
+        if ( (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) ||
              !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSFXSR) )
             return X86EMUL_UNHANDLEABLE;
         break;
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -19,15 +19,12 @@
 
 static void fpu_init(void)
 {
-    unsigned long val;
+    uint32_t val = MXCSR_DEFAULT;
     
     asm volatile ( "fninit" );
-    if ( cpu_has_xmm )
-    {
-        /* load default value into MXCSR control/status register */
-        val = MXCSR_DEFAULT;
-        asm volatile ( "ldmxcsr %0" : : "m" (val) );
-    }
+
+    /* load default value into MXCSR control/status register */
+    asm volatile ( "ldmxcsr %0" : : "m" (val) );
 }
 
 /*******************************/
@@ -122,14 +119,6 @@ static inline void fpu_fxrstor(struct vc
     }
 }
 
-/* Restore x87 extended state */
-static inline void fpu_frstor(struct vcpu *v)
-{
-    const char *fpu_ctxt = v->arch.fpu_ctxt;
-
-    asm volatile ( "frstor %0" : : "m" (*fpu_ctxt) );
-}
-
 /*******************************/
 /*      FPU Save Functions     */
 /*******************************/
@@ -206,15 +195,6 @@ static inline void fpu_fxsave(struct vcp
         fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = word_size;
 }
 
-/* Save x87 FPU state */
-static inline void fpu_fsave(struct vcpu *v)
-{
-    char *fpu_ctxt = v->arch.fpu_ctxt;
-
-    /* FWAIT is required to make FNSAVE synchronous. */
-    asm volatile ( "fnsave %0 ; fwait" : "=m" (*fpu_ctxt) );
-}
-
 /*******************************/
 /*       VCPU FPU Functions    */
 /*******************************/
@@ -249,12 +229,7 @@ void vcpu_restore_fpu_lazy(struct vcpu *
     if ( cpu_has_xsave )
         fpu_xrstor(v, XSTATE_LAZY);
     else if ( v->fpu_initialised )
-    {
-        if ( cpu_has_fxsr )
-            fpu_fxrstor(v);
-        else
-            fpu_frstor(v);
-    }
+        fpu_fxrstor(v);
     else
         fpu_init();
 
@@ -278,10 +253,8 @@ static bool_t _vcpu_save_fpu(struct vcpu
 
     if ( cpu_has_xsave )
         fpu_xsave(v);
-    else if ( cpu_has_fxsr )
-        fpu_fxsave(v);
     else
-        fpu_fsave(v);
+        fpu_fxsave(v);
 
     v->fpu_dirtied = 0;
 
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1295,10 +1295,8 @@ void __init noreturn __start_xen(unsigne
 
     identify_cpu(&boot_cpu_data);
 
-    if ( cpu_has_fxsr )
-        set_in_cr4(X86_CR4_OSFXSR);
-    if ( cpu_has_xmm )
-        set_in_cr4(X86_CR4_OSXMMEXCPT);
+    set_in_cr4(X86_CR4_OSFXSR);
+    set_in_cr4(X86_CR4_OSXMMEXCPT);
 
     if ( disable_smep )
         setup_clear_cpu_cap(X86_FEATURE_SMEP);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -184,9 +184,6 @@
 #define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr		1
 #define cpu_has_mmx		1
-#define cpu_has_fxsr		1
-#define cpu_has_xmm		1
-#define cpu_has_xmm2		1
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_syscall		1
--- a/xen/include/asm-x86/i387.h
+++ b/xen/include/asm-x86/i387.h
@@ -17,22 +17,15 @@
 /* Byte offset of the stored word size within the FXSAVE area/portion. */
 #define FPU_WORD_SIZE_OFFSET 511
 
-struct ix87_state {
-    struct ix87_env {
-        uint16_t fcw, _res0;
-        uint16_t fsw, _res1;
-        uint16_t ftw, _res2;
-        uint32_t fip;
-        uint16_t fcs;
-        uint16_t fop;
-        uint32_t fdp;
-        uint16_t fds, _res6;
-    } env;
-    struct __packed ix87_reg {
-        uint64_t mantissa;
-        uint16_t exponent:15;
-        uint16_t sign:1;
-    } r[8];
+struct ix87_env {
+    uint16_t fcw, _res0;
+    uint16_t fsw, _res1;
+    uint16_t ftw, _res2;
+    uint32_t fip;
+    uint16_t fcs;
+    uint16_t fop;
+    uint32_t fdp;
+    uint16_t fds, _res6;
 };
 
 void vcpu_restore_fpu_eager(struct vcpu *v);
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -206,13 +206,10 @@ typedef struct { u64 pfn; } pagetable_t;
 #define pagetable_null()        pagetable_from_pfn(0)
 
 void clear_page_sse2(void *);
-#define clear_page(_p)      (cpu_has_xmm2 ?                             \
-                             clear_page_sse2((void *)(_p)) :            \
-                             (void)memset((void *)(_p), 0, PAGE_SIZE))
 void copy_page_sse2(void *, const void *);
-#define copy_page(_t,_f)    (cpu_has_xmm2 ?                             \
-                             copy_page_sse2(_t, _f) :                   \
-                             (void)memcpy(_t, _f, PAGE_SIZE))
+
+#define clear_page(_p)      clear_page_sse2(_p)
+#define copy_page(_t,_f)    copy_page_sse2(_t, _f)
 
 /* Convert between Xen-heap virtual addresses and machine addresses. */
 #define __pa(x)             (virt_to_maddr(x))

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/4] x86: drop further constant cpu_has_* predicates
  2015-10-14 13:22 [PATCH 0/4] x86: capabilities handling adjustments Jan Beulich
  2015-10-14 13:29 ` [PATCH 1/4] x86: remove unused x87 remnants of 32-bit days Jan Beulich
@ 2015-10-14 13:30 ` Jan Beulich
  2015-10-14 14:02   ` Andrew Cooper
  2015-10-14 13:31 ` [PATCH 3/4] x86: don't use FPU instructions when there is no FPU Jan Beulich
  2015-10-14 13:31 ` [PATCH 4/4] x86/capabilities: set/clear them using non-locked bitops Jan Beulich
  3 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2015-10-14 13:30 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 5356 bytes --]

EFER and SYSCALL are required on x86-64, and I think there's no point
in assuming there might be no TSC. A few other predicates are simply
unused. Of the ones left but constant I'm not convinced we should drop
them; for some of them we may actually better make them non-constant
(see also next patch in this series).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -194,8 +194,7 @@ static int enter_state(u32 state)
     /* Restore CR4 and EFER from cached values. */
     cr4 = read_cr4();
     write_cr4(cr4 & ~X86_CR4_MCE);
-    if ( cpu_has_efer )
-        write_efer(read_efer());
+    write_efer(read_efer());
 
     device_power_up();
 
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1130,7 +1130,7 @@ static void __devinit setup_APIC_timer(v
 
 static int __init calibrate_APIC_clock(void)
 {
-    unsigned long long t1 = 0, t2 = 0;
+    unsigned long long t1, t2;
     long tt1, tt2;
     long result;
     int i;
@@ -1157,8 +1157,7 @@ static int __init calibrate_APIC_clock(v
     /*
      * We wrapped around just now. Let's start:
      */
-    if (cpu_has_tsc)
-        t1 = rdtsc();
+    t1 = rdtsc();
     tt1 = apic_read(APIC_TMCCT);
 
     /*
@@ -1168,8 +1167,7 @@ static int __init calibrate_APIC_clock(v
         wait_8254_wraparound();
 
     tt2 = apic_read(APIC_TMCCT);
-    if (cpu_has_tsc)
-        t2 = rdtsc();
+    t2 = rdtsc();
 
     /*
      * The APIC bus clock counter is 32 bits only, it
@@ -1181,16 +1179,12 @@ static int __init calibrate_APIC_clock(v
 
     result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
 
-    if (cpu_has_tsc)
-        apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
-                    "%ld.%04ld MHz.\n",
-                    ((long)(t2-t1)/LOOPS)/(1000000/HZ),
-                    ((long)(t2-t1)/LOOPS)%(1000000/HZ));
-
-    apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-                "%ld.%04ld MHz.\n",
-                result/(1000000/HZ),
-                result%(1000000/HZ));
+    apic_printk(APIC_VERBOSE, "..... CPU clock speed is %ld.%04ld MHz.\n",
+                ((long)(t2 - t1) / LOOPS) / (1000000 / HZ),
+                ((long)(t2 - t1) / LOOPS) % (1000000 / HZ));
+
+    apic_printk(APIC_VERBOSE, "..... host bus clock speed is %ld.%04ld MHz.\n",
+                result / (1000000 / HZ), result % (1000000 / HZ));
 
     /* set up multipliers for accurate timer code */
     bus_freq   = result*HZ;
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -86,17 +86,13 @@ trampoline_protmode_entry:
 
         /* Set up EFER (Extended Feature Enable Register). */
         mov     bootsym_rel(cpuid_ext_features,4,%edi)
-        test    $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */
-        jz      .Lskip_efer
         movl    $MSR_EFER,%ecx
         rdmsr
-        btsl    $_EFER_LME,%eax /* Long Mode      */
-        btsl    $_EFER_SCE,%eax /* SYSCALL/SYSRET */
-        btl     $20,%edi        /* No Execute?    */
+        or      $EFER_LME|EFER_SCE,%eax   /* Long Mode + SYSCALL/SYSRET */
+        bt      $X86_FEATURE_NX % 32,%edi /* No Execute? */
         jnc     1f
         btsl    $_EFER_NX,%eax  /* No Execute     */
 1:      wrmsr
-.Lskip_efer:
 
         mov     $(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_NE |\
                   X86_CR0_ET | X86_CR0_MP | X86_CR0_PE), %eax
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -625,8 +625,7 @@ void __init noreturn __start_xen(unsigne
 
     parse_video_info();
 
-    if ( cpu_has_efer )
-        rdmsrl(MSR_EFER, this_cpu(efer));
+    rdmsrl(MSR_EFER, this_cpu(efer));
     asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) );
 
     /* We initialise the serial devices very early so we can get debugging. */
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -332,8 +332,7 @@ void start_secondary(void *unused)
     set_processor_id(cpu);
     set_current(idle_vcpu[cpu]);
     this_cpu(curr_vcpu) = idle_vcpu[cpu];
-    if ( cpu_has_efer )
-        rdmsrl(MSR_EFER, this_cpu(efer));
+    rdmsrl(MSR_EFER, this_cpu(efer));
 
     /*
      * Just as during early bootstrap, it is convenient here to disable
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -174,10 +174,8 @@
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
 #define CPUID5_ECX_INTERRUPT_BREAK      0x2
 
-#define cpu_has_vme		0
 #define cpu_has_de		1
 #define cpu_has_pse		1
-#define cpu_has_tsc		1
 #define cpu_has_pge		1
 #define cpu_has_pat		1
 #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
@@ -186,15 +184,10 @@
 #define cpu_has_mmx		1
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_syscall		1
 #define cpu_has_mp		1
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
-#define cpu_has_k6_mtrr		0
-#define cpu_has_cyrix_arr	0
-#define cpu_has_centaur_mcr	0
 #define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
 #define cpu_has_page1gb		boot_cpu_has(X86_FEATURE_PAGE1GB)
-#define cpu_has_efer		1
 #define cpu_has_fsgsbase	boot_cpu_has(X86_FEATURE_FSGSBASE)
 
 #define cpu_has_smep            boot_cpu_has(X86_FEATURE_SMEP)



[-- Attachment #2: x86-remove-efer-checks.patch --]
[-- Type: text/plain, Size: 5403 bytes --]

x86: drop further constant cpu_has_* predicates

EFER and SYSCALL are required on x86-64, and I think there's no point
in assuming there might be no TSC. A few other predicates are simply
unused. Of the ones left but constant I'm not convinced we should drop
them; for some of them we may actually better make them non-constant
(see also next patch in this series).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -194,8 +194,7 @@ static int enter_state(u32 state)
     /* Restore CR4 and EFER from cached values. */
     cr4 = read_cr4();
     write_cr4(cr4 & ~X86_CR4_MCE);
-    if ( cpu_has_efer )
-        write_efer(read_efer());
+    write_efer(read_efer());
 
     device_power_up();
 
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1130,7 +1130,7 @@ static void __devinit setup_APIC_timer(v
 
 static int __init calibrate_APIC_clock(void)
 {
-    unsigned long long t1 = 0, t2 = 0;
+    unsigned long long t1, t2;
     long tt1, tt2;
     long result;
     int i;
@@ -1157,8 +1157,7 @@ static int __init calibrate_APIC_clock(v
     /*
      * We wrapped around just now. Let's start:
      */
-    if (cpu_has_tsc)
-        t1 = rdtsc();
+    t1 = rdtsc();
     tt1 = apic_read(APIC_TMCCT);
 
     /*
@@ -1168,8 +1167,7 @@ static int __init calibrate_APIC_clock(v
         wait_8254_wraparound();
 
     tt2 = apic_read(APIC_TMCCT);
-    if (cpu_has_tsc)
-        t2 = rdtsc();
+    t2 = rdtsc();
 
     /*
      * The APIC bus clock counter is 32 bits only, it
@@ -1181,16 +1179,12 @@ static int __init calibrate_APIC_clock(v
 
     result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
 
-    if (cpu_has_tsc)
-        apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
-                    "%ld.%04ld MHz.\n",
-                    ((long)(t2-t1)/LOOPS)/(1000000/HZ),
-                    ((long)(t2-t1)/LOOPS)%(1000000/HZ));
-
-    apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-                "%ld.%04ld MHz.\n",
-                result/(1000000/HZ),
-                result%(1000000/HZ));
+    apic_printk(APIC_VERBOSE, "..... CPU clock speed is %ld.%04ld MHz.\n",
+                ((long)(t2 - t1) / LOOPS) / (1000000 / HZ),
+                ((long)(t2 - t1) / LOOPS) % (1000000 / HZ));
+
+    apic_printk(APIC_VERBOSE, "..... host bus clock speed is %ld.%04ld MHz.\n",
+                result / (1000000 / HZ), result % (1000000 / HZ));
 
     /* set up multipliers for accurate timer code */
     bus_freq   = result*HZ;
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -86,17 +86,13 @@ trampoline_protmode_entry:
 
         /* Set up EFER (Extended Feature Enable Register). */
         mov     bootsym_rel(cpuid_ext_features,4,%edi)
-        test    $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */
-        jz      .Lskip_efer
         movl    $MSR_EFER,%ecx
         rdmsr
-        btsl    $_EFER_LME,%eax /* Long Mode      */
-        btsl    $_EFER_SCE,%eax /* SYSCALL/SYSRET */
-        btl     $20,%edi        /* No Execute?    */
+        or      $EFER_LME|EFER_SCE,%eax   /* Long Mode + SYSCALL/SYSRET */
+        bt      $X86_FEATURE_NX % 32,%edi /* No Execute? */
         jnc     1f
         btsl    $_EFER_NX,%eax  /* No Execute     */
 1:      wrmsr
-.Lskip_efer:
 
         mov     $(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_NE |\
                   X86_CR0_ET | X86_CR0_MP | X86_CR0_PE), %eax
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -625,8 +625,7 @@ void __init noreturn __start_xen(unsigne
 
     parse_video_info();
 
-    if ( cpu_has_efer )
-        rdmsrl(MSR_EFER, this_cpu(efer));
+    rdmsrl(MSR_EFER, this_cpu(efer));
     asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) );
 
     /* We initialise the serial devices very early so we can get debugging. */
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -332,8 +332,7 @@ void start_secondary(void *unused)
     set_processor_id(cpu);
     set_current(idle_vcpu[cpu]);
     this_cpu(curr_vcpu) = idle_vcpu[cpu];
-    if ( cpu_has_efer )
-        rdmsrl(MSR_EFER, this_cpu(efer));
+    rdmsrl(MSR_EFER, this_cpu(efer));
 
     /*
      * Just as during early bootstrap, it is convenient here to disable
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -174,10 +174,8 @@
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
 #define CPUID5_ECX_INTERRUPT_BREAK      0x2
 
-#define cpu_has_vme		0
 #define cpu_has_de		1
 #define cpu_has_pse		1
-#define cpu_has_tsc		1
 #define cpu_has_pge		1
 #define cpu_has_pat		1
 #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
@@ -186,15 +184,10 @@
 #define cpu_has_mmx		1
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_syscall		1
 #define cpu_has_mp		1
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
-#define cpu_has_k6_mtrr		0
-#define cpu_has_cyrix_arr	0
-#define cpu_has_centaur_mcr	0
 #define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
 #define cpu_has_page1gb		boot_cpu_has(X86_FEATURE_PAGE1GB)
-#define cpu_has_efer		1
 #define cpu_has_fsgsbase	boot_cpu_has(X86_FEATURE_FSGSBASE)
 
 #define cpu_has_smep            boot_cpu_has(X86_FEATURE_SMEP)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 3/4] x86: don't use FPU instructions when there is no FPU
  2015-10-14 13:22 [PATCH 0/4] x86: capabilities handling adjustments Jan Beulich
  2015-10-14 13:29 ` [PATCH 1/4] x86: remove unused x87 remnants of 32-bit days Jan Beulich
  2015-10-14 13:30 ` [PATCH 2/4] x86: drop further constant cpu_has_* predicates Jan Beulich
@ 2015-10-14 13:31 ` Jan Beulich
  2015-10-14 14:30   ` Andrew Cooper
  2015-10-14 13:31 ` [PATCH 4/4] x86/capabilities: set/clear them using non-locked bitops Jan Beulich
  3 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2015-10-14 13:31 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 1970 bytes --]

One of the many things we need to do if we want to eventually be able
to run on e.g. Xeon Phi.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -261,6 +261,9 @@ static void __cpuinit generic_identify(s
 		cpuid_count(0x00000007, 0, &dummy, &ebx, &dummy, &dummy);
 		c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx;
 	}
+
+	if ( !cpu_has_fpu )
+		__set_bit(X86_FEATURE_NO_FPU_SEL, c->x86_capability);
 }
 
 /*
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -21,7 +21,8 @@ static void fpu_init(void)
 {
     uint32_t val = MXCSR_DEFAULT;
     
-    asm volatile ( "fninit" );
+    if ( cpu_has_fpu || cpu_has_mmx )
+        asm volatile ( "fninit" );
 
     /* load default value into MXCSR control/status register */
     asm volatile ( "ldmxcsr %0" : : "m" (val) );
@@ -59,7 +60,7 @@ static inline void fpu_fxrstor(struct vc
      * sometimes new user value. Both should be ok. Use the FPU saved
      * data block as a safe address because it should be in L1.
      */
-    if ( !(fpu_ctxt->fsw & 0x0080) &&
+    if ( cpu_has_fpu && !(fpu_ctxt->fsw & 0x0080) &&
          boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
     {
         asm volatile ( "fnclex\n\t"
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -170,6 +170,7 @@
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
 #define CPUID5_ECX_INTERRUPT_BREAK      0x2
 
+#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
 #define cpu_has_de		1
 #define cpu_has_pse		1
 #define cpu_has_pge		1
@@ -177,7 +178,7 @@
 #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
 #define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr		1
-#define cpu_has_mmx		1
+#define cpu_has_mmx		boot_cpu_has(X86_FEATURE_MMX)
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		1




[-- Attachment #2: x86-check-FPU-MMX.patch --]
[-- Type: text/plain, Size: 2020 bytes --]

x86: don't use FPU instructions when there is no FPU

One of the many things we need to do if we want to eventually be able
to run on e.g. Xeon Phi.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -261,6 +261,9 @@ static void __cpuinit generic_identify(s
 		cpuid_count(0x00000007, 0, &dummy, &ebx, &dummy, &dummy);
 		c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx;
 	}
+
+	if ( !cpu_has_fpu )
+		__set_bit(X86_FEATURE_NO_FPU_SEL, c->x86_capability);
 }
 
 /*
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -21,7 +21,8 @@ static void fpu_init(void)
 {
     uint32_t val = MXCSR_DEFAULT;
     
-    asm volatile ( "fninit" );
+    if ( cpu_has_fpu || cpu_has_mmx )
+        asm volatile ( "fninit" );
 
     /* load default value into MXCSR control/status register */
     asm volatile ( "ldmxcsr %0" : : "m" (val) );
@@ -59,7 +60,7 @@ static inline void fpu_fxrstor(struct vc
      * sometimes new user value. Both should be ok. Use the FPU saved
      * data block as a safe address because it should be in L1.
      */
-    if ( !(fpu_ctxt->fsw & 0x0080) &&
+    if ( cpu_has_fpu && !(fpu_ctxt->fsw & 0x0080) &&
          boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
     {
         asm volatile ( "fnclex\n\t"
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -170,6 +170,7 @@
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
 #define CPUID5_ECX_INTERRUPT_BREAK      0x2
 
+#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
 #define cpu_has_de		1
 #define cpu_has_pse		1
 #define cpu_has_pge		1
@@ -177,7 +178,7 @@
 #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
 #define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr		1
-#define cpu_has_mmx		1
+#define cpu_has_mmx		boot_cpu_has(X86_FEATURE_MMX)
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		1

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 4/4] x86/capabilities: set/clear them using non-locked bitops
  2015-10-14 13:22 [PATCH 0/4] x86: capabilities handling adjustments Jan Beulich
                   ` (2 preceding siblings ...)
  2015-10-14 13:31 ` [PATCH 3/4] x86: don't use FPU instructions when there is no FPU Jan Beulich
@ 2015-10-14 13:31 ` Jan Beulich
  2015-10-14 14:36   ` Andrew Cooper
  3 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2015-10-14 13:31 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 7734 bytes --]

Their initialization happens without races, so there's no point in
using atomic (locked) operations to update the respective flags.
(There's one case where the clear_bit() was completely pointless.)

Also drop a neighboring stale comment from AMD code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -872,7 +872,7 @@ static int __init detect_init_APIC (void
         return -1;
     }
 
-    set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+    __set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
     mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
 
     /* The BIOS may have set up the APIC at some other address */
@@ -1369,7 +1369,7 @@ void pmu_apic_interrupt(struct cpu_user_
 int __init APIC_init_uniprocessor (void)
 {
     if (enable_local_apic < 0)
-        clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+        __clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
 
     if (!smp_found_config && !cpu_has_apic) {
         skip_ioapic_setup = 1;
@@ -1382,7 +1382,6 @@ int __init APIC_init_uniprocessor (void)
     if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
         printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
                boot_cpu_physical_apicid);
-        clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
         skip_ioapic_setup = 1;
         return -1;
     }
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -442,15 +442,9 @@ static void __devinit init_amd(struct cp
 		wrmsrl(MSR_K7_HWCR, value);
 	}
 
-	/*
-	 *	FIXME: We should handle the K5 here. Set up the write
-	 *	range and also turn on MSR 83 bits 4 and 31 (write alloc,
-	 *	no bus pipeline)
-	 */
-
 	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
 	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, c->x86_capability);
+	__clear_bit(0*32+31, c->x86_capability);
 	
 	if (c->x86 == 0xf && c->x86_model < 0x14
 	    && cpu_has(c, X86_FEATURE_LAHF_LM)) {
@@ -459,7 +453,7 @@ static void __devinit init_amd(struct cp
 		 * revision D (model = 0x14) and later actually support it.
 		 * (AMD Erratum #110, docId: 25759).
 		 */
-		clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability);
+		__clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability);
 		if (!rdmsr_amd_safe(0xc001100d, &l, &h))
 			wrmsr_amd_safe(0xc001100d, l, h & ~1);
 	}
@@ -482,10 +476,11 @@ static void __devinit init_amd(struct cp
 	if (c->extended_cpuid_level >= 0x80000007) {
 		c->x86_power = cpuid_edx(0x80000007);
 		if (c->x86_power & (1<<8)) {
-			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-			set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+			__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+			__set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
 			if (c->x86 != 0x11)
-				set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
+				__set_bit(X86_FEATURE_TSC_RELIABLE,
+					  c->x86_capability);
 		}
 	}
 
@@ -498,7 +493,7 @@ static void __devinit init_amd(struct cp
 		wrmsr_safe(MSR_K8_EXT_FEATURE_MASK, value);
 		rdmsrl(MSR_K8_EXT_FEATURE_MASK, value);
 		if (value & (1ULL << 54)) {
-			set_bit(X86_FEATURE_TOPOEXT, c->x86_capability);
+			__set_bit(X86_FEATURE_TOPOEXT, c->x86_capability);
 			printk(KERN_INFO "CPU: Re-enabling disabled "
 			       "Topology Extensions Support\n");
 		}
@@ -516,7 +511,7 @@ static void __devinit init_amd(struct cp
 
 	/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
 	if (c->x86 >= 0x10 && !force_mwait)
-		clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
+		__clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
 
 	if (!cpu_has_amd_erratum(c, AMD_ERRATUM_121))
 		opt_allow_unsafe = 1;
@@ -566,7 +561,7 @@ static void __devinit init_amd(struct cp
 	}
 
 	/* AMD CPUs do not support SYSENTER outside of legacy mode. */
-	clear_bit(X86_FEATURE_SEP, c->x86_capability);
+	__clear_bit(X86_FEATURE_SEP, c->x86_capability);
 
 	if (c->x86 == 0x10) {
 		/* do this for boot cpu */
@@ -592,7 +587,7 @@ static void __devinit init_amd(struct cp
 	 * running in deep C states.
 	 */
 	if ( opt_arat && c->x86 > 0x11 )
-		set_bit(X86_FEATURE_ARAT, c->x86_capability);
+		__set_bit(X86_FEATURE_ARAT, c->x86_capability);
 
 	/*
 	 * Prior to Family 0x14, perf counters are not reset during warm reboot.
--- a/xen/arch/x86/cpu/centaur.c
+++ b/xen/arch/x86/cpu/centaur.c
@@ -47,7 +47,7 @@ static void __init init_c3(struct cpuinf
 
 	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
-		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+		__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
 	}
 
 	get_model_name(c);
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -312,7 +312,7 @@ void __cpuinit identify_cpu(struct cpuin
 
         /* Initialize xsave/xrstor features */
 	if ( !use_xsave )
-		clear_bit(X86_FEATURE_XSAVE, boot_cpu_data.x86_capability);
+		__clear_bit(X86_FEATURE_XSAVE, boot_cpu_data.x86_capability);
 
 	if ( cpu_has_xsave )
 		xstate_init(c);
@@ -392,7 +392,7 @@ void __cpuinit detect_extended_topology(
 	if ( ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE) )
 		return;
 
-	set_bit(X86_FEATURE_XTOPOLOGY, c->x86_capability);
+	__set_bit(X86_FEATURE_XTOPOLOGY, c->x86_capability);
 
 	initial_apicid = edx;
 
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -209,7 +209,7 @@ static void __devinit Intel_errata_worka
 
 	if (c->x86 == 6 && cpu_has_clflush &&
 	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
-		set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
+		__set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
 }
 
 
@@ -244,7 +244,7 @@ static void __devinit init_intel(struct 
 		unsigned eax = cpuid_eax(10);
 		/* Check for version and the number of counters */
 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-			set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+			__set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
 	}
 
 	if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
@@ -255,10 +255,11 @@ static void __devinit init_intel(struct 
 
 	if (c == &boot_cpu_data && c->x86 == 6) {
 		if (probe_intel_cpuid_faulting())
-			set_bit(X86_FEATURE_CPUID_FAULTING, c->x86_capability);
+			__set_bit(X86_FEATURE_CPUID_FAULTING,
+				  c->x86_capability);
 	} else if (boot_cpu_has(X86_FEATURE_CPUID_FAULTING)) {
 		BUG_ON(!probe_intel_cpuid_faulting());
-		set_bit(X86_FEATURE_CPUID_FAULTING, c->x86_capability);
+		__set_bit(X86_FEATURE_CPUID_FAULTING, c->x86_capability);
 	}
 
 	if (!cpu_has_cpuid_faulting)
@@ -274,16 +275,16 @@ static void __devinit init_intel(struct 
 
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
-		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+		__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
 	if (cpuid_edx(0x80000007) & (1u<<8)) {
-		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-		set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
-		set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
+		__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+		__set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+		__set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
 	}
 	if ( opt_arat &&
 	     ( c->cpuid_level >= 0x00000006 ) &&
 	     ( cpuid_eax(0x00000006) & (1u<<2) ) )
-		set_bit(X86_FEATURE_ARAT, c->x86_capability);
+		__set_bit(X86_FEATURE_ARAT, c->x86_capability);
 }
 
 static const struct cpu_dev intel_cpu_dev = {



[-- Attachment #2: x86-caps-use-non-atomic-bitops.patch --]
[-- Type: text/plain, Size: 7790 bytes --]

x86/capabilities: set/clear them using non-locked bitops

Their initialization happens without races, so there's no point in
using atomic (locked) operations to update the respective flags.
(There's one case where the clear_bit() was completely pointless.)

Also drop a neighboring stale comment from AMD code.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -872,7 +872,7 @@ static int __init detect_init_APIC (void
         return -1;
     }
 
-    set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+    __set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
     mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
 
     /* The BIOS may have set up the APIC at some other address */
@@ -1369,7 +1369,7 @@ void pmu_apic_interrupt(struct cpu_user_
 int __init APIC_init_uniprocessor (void)
 {
     if (enable_local_apic < 0)
-        clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+        __clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
 
     if (!smp_found_config && !cpu_has_apic) {
         skip_ioapic_setup = 1;
@@ -1382,7 +1382,6 @@ int __init APIC_init_uniprocessor (void)
     if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
         printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
                boot_cpu_physical_apicid);
-        clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
         skip_ioapic_setup = 1;
         return -1;
     }
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -442,15 +442,9 @@ static void __devinit init_amd(struct cp
 		wrmsrl(MSR_K7_HWCR, value);
 	}
 
-	/*
-	 *	FIXME: We should handle the K5 here. Set up the write
-	 *	range and also turn on MSR 83 bits 4 and 31 (write alloc,
-	 *	no bus pipeline)
-	 */
-
 	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
 	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, c->x86_capability);
+	__clear_bit(0*32+31, c->x86_capability);
 	
 	if (c->x86 == 0xf && c->x86_model < 0x14
 	    && cpu_has(c, X86_FEATURE_LAHF_LM)) {
@@ -459,7 +453,7 @@ static void __devinit init_amd(struct cp
 		 * revision D (model = 0x14) and later actually support it.
 		 * (AMD Erratum #110, docId: 25759).
 		 */
-		clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability);
+		__clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability);
 		if (!rdmsr_amd_safe(0xc001100d, &l, &h))
 			wrmsr_amd_safe(0xc001100d, l, h & ~1);
 	}
@@ -482,10 +476,11 @@ static void __devinit init_amd(struct cp
 	if (c->extended_cpuid_level >= 0x80000007) {
 		c->x86_power = cpuid_edx(0x80000007);
 		if (c->x86_power & (1<<8)) {
-			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-			set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+			__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+			__set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
 			if (c->x86 != 0x11)
-				set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
+				__set_bit(X86_FEATURE_TSC_RELIABLE,
+					  c->x86_capability);
 		}
 	}
 
@@ -498,7 +493,7 @@ static void __devinit init_amd(struct cp
 		wrmsr_safe(MSR_K8_EXT_FEATURE_MASK, value);
 		rdmsrl(MSR_K8_EXT_FEATURE_MASK, value);
 		if (value & (1ULL << 54)) {
-			set_bit(X86_FEATURE_TOPOEXT, c->x86_capability);
+			__set_bit(X86_FEATURE_TOPOEXT, c->x86_capability);
 			printk(KERN_INFO "CPU: Re-enabling disabled "
 			       "Topology Extensions Support\n");
 		}
@@ -516,7 +511,7 @@ static void __devinit init_amd(struct cp
 
 	/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
 	if (c->x86 >= 0x10 && !force_mwait)
-		clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
+		__clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
 
 	if (!cpu_has_amd_erratum(c, AMD_ERRATUM_121))
 		opt_allow_unsafe = 1;
@@ -566,7 +561,7 @@ static void __devinit init_amd(struct cp
 	}
 
 	/* AMD CPUs do not support SYSENTER outside of legacy mode. */
-	clear_bit(X86_FEATURE_SEP, c->x86_capability);
+	__clear_bit(X86_FEATURE_SEP, c->x86_capability);
 
 	if (c->x86 == 0x10) {
 		/* do this for boot cpu */
@@ -592,7 +587,7 @@ static void __devinit init_amd(struct cp
 	 * running in deep C states.
 	 */
 	if ( opt_arat && c->x86 > 0x11 )
-		set_bit(X86_FEATURE_ARAT, c->x86_capability);
+		__set_bit(X86_FEATURE_ARAT, c->x86_capability);
 
 	/*
 	 * Prior to Family 0x14, perf counters are not reset during warm reboot.
--- a/xen/arch/x86/cpu/centaur.c
+++ b/xen/arch/x86/cpu/centaur.c
@@ -47,7 +47,7 @@ static void __init init_c3(struct cpuinf
 
 	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
-		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+		__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
 	}
 
 	get_model_name(c);
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -312,7 +312,7 @@ void __cpuinit identify_cpu(struct cpuin
 
         /* Initialize xsave/xrstor features */
 	if ( !use_xsave )
-		clear_bit(X86_FEATURE_XSAVE, boot_cpu_data.x86_capability);
+		__clear_bit(X86_FEATURE_XSAVE, boot_cpu_data.x86_capability);
 
 	if ( cpu_has_xsave )
 		xstate_init(c);
@@ -392,7 +392,7 @@ void __cpuinit detect_extended_topology(
 	if ( ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE) )
 		return;
 
-	set_bit(X86_FEATURE_XTOPOLOGY, c->x86_capability);
+	__set_bit(X86_FEATURE_XTOPOLOGY, c->x86_capability);
 
 	initial_apicid = edx;
 
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -209,7 +209,7 @@ static void __devinit Intel_errata_worka
 
 	if (c->x86 == 6 && cpu_has_clflush &&
 	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
-		set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
+		__set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
 }
 
 
@@ -244,7 +244,7 @@ static void __devinit init_intel(struct 
 		unsigned eax = cpuid_eax(10);
 		/* Check for version and the number of counters */
 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-			set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+			__set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
 	}
 
 	if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
@@ -255,10 +255,11 @@ static void __devinit init_intel(struct 
 
 	if (c == &boot_cpu_data && c->x86 == 6) {
 		if (probe_intel_cpuid_faulting())
-			set_bit(X86_FEATURE_CPUID_FAULTING, c->x86_capability);
+			__set_bit(X86_FEATURE_CPUID_FAULTING,
+				  c->x86_capability);
 	} else if (boot_cpu_has(X86_FEATURE_CPUID_FAULTING)) {
 		BUG_ON(!probe_intel_cpuid_faulting());
-		set_bit(X86_FEATURE_CPUID_FAULTING, c->x86_capability);
+		__set_bit(X86_FEATURE_CPUID_FAULTING, c->x86_capability);
 	}
 
 	if (!cpu_has_cpuid_faulting)
@@ -274,16 +275,16 @@ static void __devinit init_intel(struct 
 
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
-		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+		__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
 	if (cpuid_edx(0x80000007) & (1u<<8)) {
-		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-		set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
-		set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
+		__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+		__set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+		__set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
 	}
 	if ( opt_arat &&
 	     ( c->cpuid_level >= 0x00000006 ) &&
 	     ( cpuid_eax(0x00000006) & (1u<<2) ) )
-		set_bit(X86_FEATURE_ARAT, c->x86_capability);
+		__set_bit(X86_FEATURE_ARAT, c->x86_capability);
 }
 
 static const struct cpu_dev intel_cpu_dev = {

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] x86: remove unused x87 remnants of 32-bit days
  2015-10-14 13:29 ` [PATCH 1/4] x86: remove unused x87 remnants of 32-bit days Jan Beulich
@ 2015-10-14 13:57   ` Andrew Cooper
  2015-10-14 14:05     ` Andrew Cooper
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2015-10-14 13:57 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 14/10/15 14:29, Jan Beulich wrote:
> x86-64 requires FXSR, XMM, and XMM2, so there's no point in hiding
> respective code behind conditionals.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>, with two
suggestions.

> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1295,10 +1295,8 @@ void __init noreturn __start_xen(unsigne
>  
>      identify_cpu(&boot_cpu_data);
>  
> -    if ( cpu_has_fxsr )
> -        set_in_cr4(X86_CR4_OSFXSR);
> -    if ( cpu_has_xmm )
> -        set_in_cr4(X86_CR4_OSXMMEXCPT);
> +    set_in_cr4(X86_CR4_OSFXSR);
> +    set_in_cr4(X86_CR4_OSXMMEXCPT);

set_in_cr4() takes a mask, so could take both of these at once, to be
slightly more efficient.

If the bsp path gained an unconditional write_cr4(mmu_cr4_features) then
it might be better to fold them into the initialiser for mmu_cr4_features.

> --- a/xen/include/asm-x86/page.h
> +++ b/xen/include/asm-x86/page.h
> @@ -206,13 +206,10 @@ typedef struct { u64 pfn; } pagetable_t;
>  #define pagetable_null()        pagetable_from_pfn(0)
>  
>  void clear_page_sse2(void *);
> -#define clear_page(_p)      (cpu_has_xmm2 ?                             \
> -                             clear_page_sse2((void *)(_p)) :            \
> -                             (void)memset((void *)(_p), 0, PAGE_SIZE))
>  void copy_page_sse2(void *, const void *);
> -#define copy_page(_t,_f)    (cpu_has_xmm2 ?                             \
> -                             copy_page_sse2(_t, _f) :                   \
> -                             (void)memcpy(_t, _f, PAGE_SIZE))
> +
> +#define clear_page(_p)      clear_page_sse2(_p)
> +#define copy_page(_t,_f)    copy_page_sse2(_t, _f)

As you are changing this, an extra space following the comma.

~Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] x86: drop further constant cpu_has_* predicates
  2015-10-14 13:30 ` [PATCH 2/4] x86: drop further constant cpu_has_* predicates Jan Beulich
@ 2015-10-14 14:02   ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2015-10-14 14:02 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 14/10/15 14:30, Jan Beulich wrote:
> EFER and SYSCALL are required on x86-64, and I think there's no point
> in assuming there might be no TSC. A few other predicates are simply
> unused. Of the ones left but constant I'm not convinced we should drop
> them; for some of them we may actually better make them non-constant
> (see also next patch in this series).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] x86: remove unused x87 remnants of 32-bit days
  2015-10-14 13:57   ` Andrew Cooper
@ 2015-10-14 14:05     ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2015-10-14 14:05 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 14/10/15 14:57, Andrew Cooper wrote:
> On 14/10/15 14:29, Jan Beulich wrote:
>> x86-64 requires FXSR, XMM, and XMM2, so there's no point in hiding
>> respective code behind conditionals.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>, with two
> suggestions.
>
>> --- a/xen/arch/x86/setup.c
>> +++ b/xen/arch/x86/setup.c
>> @@ -1295,10 +1295,8 @@ void __init noreturn __start_xen(unsigne
>>  
>>      identify_cpu(&boot_cpu_data);
>>  
>> -    if ( cpu_has_fxsr )
>> -        set_in_cr4(X86_CR4_OSFXSR);
>> -    if ( cpu_has_xmm )
>> -        set_in_cr4(X86_CR4_OSXMMEXCPT);
>> +    set_in_cr4(X86_CR4_OSFXSR);
>> +    set_in_cr4(X86_CR4_OSXMMEXCPT);
> set_in_cr4() takes a mask, so could take both of these at once, to be
> slightly more efficient.
>
> If the bsp path gained an unconditional write_cr4(mmu_cr4_features) then
> it might be better to fold them into the initialiser for mmu_cr4_features.
>
>> --- a/xen/include/asm-x86/page.h
>> +++ b/xen/include/asm-x86/page.h
>> @@ -206,13 +206,10 @@ typedef struct { u64 pfn; } pagetable_t;
>>  #define pagetable_null()        pagetable_from_pfn(0)
>>  
>>  void clear_page_sse2(void *);
>> -#define clear_page(_p)      (cpu_has_xmm2 ?                             \
>> -                             clear_page_sse2((void *)(_p)) :            \
>> -                             (void)memset((void *)(_p), 0, PAGE_SIZE))
>>  void copy_page_sse2(void *, const void *);
>> -#define copy_page(_t,_f)    (cpu_has_xmm2 ?                             \
>> -                             copy_page_sse2(_t, _f) :                   \
>> -                             (void)memcpy(_t, _f, PAGE_SIZE))
>> +
>> +#define clear_page(_p)      clear_page_sse2(_p)
>> +#define copy_page(_t,_f)    copy_page_sse2(_t, _f)
> As you are changing this, an extra space following the comma.
>
> ~Andrew

A third suggestion to delete the trailing whitespace before fninit in
the following hunk.

--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -19,15 +19,12 @@
 
 static void fpu_init(void)
 {
-    unsigned long val;
+    uint32_t val = MXCSR_DEFAULT;
     
     asm volatile ( "fninit" );
-    if ( cpu_has_xmm )
-    {
-        /* load default value into MXCSR control/status register */
-        val = MXCSR_DEFAULT;
-        asm volatile ( "ldmxcsr %0" : : "m" (val) );
-    }
+
+    /* load default value into MXCSR control/status register */
+    asm volatile ( "ldmxcsr %0" : : "m" (val) );
 }
 

~Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] x86: don't use FPU instructions when there is no FPU
  2015-10-14 13:31 ` [PATCH 3/4] x86: don't use FPU instructions when there is no FPU Jan Beulich
@ 2015-10-14 14:30   ` Andrew Cooper
  2015-10-14 14:46     ` Jan Beulich
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2015-10-14 14:30 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 14/10/15 14:31, Jan Beulich wrote:
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -170,6 +170,7 @@
>  #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
>  #define CPUID5_ECX_INTERRUPT_BREAK      0x2
>  
> +#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)

This distinguishes only between an onboard fpu or an original x87
co-processor.  It does not represent the presence or absence of an fpu.

Looking at the Xeon Phi documentation
(http://download-software.intel.com/sites/default/files/forum/278102/327364001en.pdf),
it has this feature bit set, although it does have MMX clear.

Interestingly however, a Phi's default MXCSR value ix 0x20000h and will
#GP at any attempt to clear bit 21, which means fpu_init() will
unconditionally blow up on such a system.

I am not aware of a bit which signifies the absence of FPU
capabilities.  Until there is, I don't think changes along these lines
are a safe way to go.

~Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] x86/capabilities: set/clear them using non-locked bitops
  2015-10-14 13:31 ` [PATCH 4/4] x86/capabilities: set/clear them using non-locked bitops Jan Beulich
@ 2015-10-14 14:36   ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2015-10-14 14:36 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Keir Fraser

On 14/10/15 14:31, Jan Beulich wrote:
> Their initialization happens without races, so there's no point in
> using atomic (locked) operations to update the respective flags.
> (There's one case where the clear_bit() was completely pointless.)
>
> Also drop a neighboring stale comment from AMD code.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] x86: don't use FPU instructions when there is no FPU
  2015-10-14 14:30   ` Andrew Cooper
@ 2015-10-14 14:46     ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2015-10-14 14:46 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser

>>> On 14.10.15 at 16:30, <andrew.cooper3@citrix.com> wrote:
> On 14/10/15 14:31, Jan Beulich wrote:
>> --- a/xen/include/asm-x86/cpufeature.h
>> +++ b/xen/include/asm-x86/cpufeature.h
>> @@ -170,6 +170,7 @@
>>  #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
>>  #define CPUID5_ECX_INTERRUPT_BREAK      0x2
>>  
>> +#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
> 
> This distinguishes only between an onboard fpu or an original x87
> co-processor.  It does not represent the presence or absence of an fpu.

That's a very odd statement with off-chip FPUs having disappeared
with the Pentium. We don't care about x87 co-processors at all in Xen.

> Looking at the Xeon Phi documentation
> (http://download-software.intel.com/sites/default/files/forum/278102/327364001en.pdf),
> it has this feature bit set, although it does have MMX clear.
> 
> Interestingly however, a Phi's default MXCSR value ix 0x20000h and will
> #GP at any attempt to clear bit 21, which means fpu_init() will
> unconditionally blow up on such a system.

"... one of the many things ..."

> I am not aware of a bit which signifies the absence of FPU
> capabilities.  Until there is, I don't think changes along these lines
> are a safe way to go.

See above; I continue to think this is the right thing at this point.
Going forward we'll have to see whether co-processors re-appear.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-10-14 14:46 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-14 13:22 [PATCH 0/4] x86: capabilities handling adjustments Jan Beulich
2015-10-14 13:29 ` [PATCH 1/4] x86: remove unused x87 remnants of 32-bit days Jan Beulich
2015-10-14 13:57   ` Andrew Cooper
2015-10-14 14:05     ` Andrew Cooper
2015-10-14 13:30 ` [PATCH 2/4] x86: drop further constant cpu_has_* predicates Jan Beulich
2015-10-14 14:02   ` Andrew Cooper
2015-10-14 13:31 ` [PATCH 3/4] x86: don't use FPU instructions when there is no FPU Jan Beulich
2015-10-14 14:30   ` Andrew Cooper
2015-10-14 14:46     ` Jan Beulich
2015-10-14 13:31 ` [PATCH 4/4] x86/capabilities: set/clear them using non-locked bitops Jan Beulich
2015-10-14 14:36   ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.