All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] x86: NOP improvements
@ 2017-12-18  9:27 Jan Beulich
  2017-12-18 10:13 ` [PATCH 1/2] x86: improve NOP use for AMD CPUs Jan Beulich
  2017-12-18 10:13 ` [PATCH 2/2] x86: introduce NOP9 forms Jan Beulich
  0 siblings, 2 replies; 7+ messages in thread
From: Jan Beulich @ 2017-12-18  9:27 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

1: improve NOP use for AMD CPUs
2: introduce NOP9 forms

Signed-off-by: Jan Beulich <jbeulich@suse.com>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] x86: improve NOP use for AMD CPUs
  2017-12-18  9:27 [PATCH 0/2] x86: NOP improvements Jan Beulich
@ 2017-12-18 10:13 ` Jan Beulich
  2017-12-18 10:58   ` Andrew Cooper
  2017-12-18 10:13 ` [PATCH 2/2] x86: introduce NOP9 forms Jan Beulich
  1 sibling, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2017-12-18 10:13 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

For Fam10 and later AMD recommends using the "long" NOP forms. Re-write
the present Intel code into switch() statements and add AMD logic. This
at the same time brings us in line again with current Linux.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -85,19 +85,34 @@ static int __init mask_nmi_callback(cons
 
 static void __init arch_init_ideal_nops(void)
 {
-    /*
-     * Due to a decoder implementation quirk, some
-     * specific Intel CPUs actually perform better with
-     * the "k8_nops" than with the SDM-recommended NOPs.
-     */
-    if ( (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-         !(boot_cpu_data.x86 == 6 &&
-           boot_cpu_data.x86_model >= 0x0f &&
-           boot_cpu_data.x86_model != 0x1c &&
-           boot_cpu_data.x86_model != 0x26 &&
-           boot_cpu_data.x86_model != 0x27 &&
-           boot_cpu_data.x86_model < 0x30) )
-        ideal_nops = p6_nops;
+    switch ( boot_cpu_data.x86_vendor )
+    {
+    case X86_VENDOR_INTEL:
+        /*
+         * Due to a decoder implementation quirk, some specific Intel CPUs
+         * actually perform better with the "k8_nops" than with the SDM-
+         * recommended NOPs.
+         */
+        if ( boot_cpu_data.x86 != 6 )
+            ideal_nops = p6_nops;
+        else
+            switch ( boot_cpu_data.x86_model )
+            {
+            case 0x0f ... 0x1b:
+            case 0x1d ... 0x25:
+            case 0x28 ... 0x2f:
+                break;
+            default:
+                ideal_nops = p6_nops;
+                break;
+            }
+        break;
+
+    case X86_VENDOR_AMD:
+        if ( boot_cpu_data.x86 > 0xf )
+            ideal_nops = p6_nops;
+        break;
+    }
 }
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] x86: introduce NOP9 forms
  2017-12-18  9:27 [PATCH 0/2] x86: NOP improvements Jan Beulich
  2017-12-18 10:13 ` [PATCH 1/2] x86: improve NOP use for AMD CPUs Jan Beulich
@ 2017-12-18 10:13 ` Jan Beulich
  2017-12-18 11:23   ` Andrew Cooper
  1 sibling, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2017-12-18 10:13 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

Both Intel and AMD recommend an operand-size-override-prefixed long NOP
form for covering 9 bytes, so introduce this and use it in p6_nops[] to
allow further reducing the number of NOPs needed when covering larger
ranges.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -37,7 +37,8 @@ static const unsigned char k8nops[] init
     K8_NOP5,
     K8_NOP6,
     K8_NOP7,
-    K8_NOP8
+    K8_NOP8,
+    K8_NOP9,
 };
 static const unsigned char * const k8_nops[ASM_NOP_MAX+1] init_or_livepatch_constrel = {
     NULL,
@@ -48,7 +49,8 @@ static const unsigned char * const k8_no
     k8nops + 1 + 2 + 3 + 4,
     k8nops + 1 + 2 + 3 + 4 + 5,
     k8nops + 1 + 2 + 3 + 4 + 5 + 6,
-    k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7
+    k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+    k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
@@ -61,7 +63,8 @@ static const unsigned char p6nops[] init
     P6_NOP5,
     P6_NOP6,
     P6_NOP7,
-    P6_NOP8
+    P6_NOP8,
+    P6_NOP9,
 };
 static const unsigned char * const p6_nops[ASM_NOP_MAX+1] init_or_livepatch_constrel = {
     NULL,
@@ -72,7 +75,8 @@ static const unsigned char * const p6_no
     p6nops + 1 + 2 + 3 + 4,
     p6nops + 1 + 2 + 3 + 4 + 5,
     p6nops + 1 + 2 + 3 + 4 + 5 + 6,
-    p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7
+    p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+    p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
--- a/xen/include/asm-x86/nops.h
+++ b/xen/include/asm-x86/nops.h
@@ -22,6 +22,7 @@
 #define K8_NOP6 K8_NOP3,K8_NOP3
 #define K8_NOP7 K8_NOP4,K8_NOP3
 #define K8_NOP8 K8_NOP4,K8_NOP4
+#define K8_NOP9 K8_NOP3,K8_NOP3,K8_NOP3
 
 /*
  * P6 nops
@@ -34,6 +35,7 @@
  * 6: osp nopl 0x00(%eax,%eax,1)
  * 7: nopl 0x00000000(%eax)
  * 8: nopl 0x00000000(%eax,%eax,1)
+ * 9: nopw 0x00000000(%eax,%eax,1)
  *    Note: All the above are assumed to be a single instruction.
  *          There is kernel code that depends on this.
  */
@@ -45,6 +47,7 @@
 #define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0
 #define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0
 #define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0
+#define P6_NOP9 0x66,0x0f,0x1f,0x84,0x00,0,0,0,0
 
 #ifdef __ASSEMBLY__
 #define _ASM_MK_NOP(x) .byte x
@@ -60,7 +63,8 @@
 #define ASM_NOP6 _ASM_MK_NOP(K8_NOP6)
 #define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
 #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
+#define ASM_NOP9 _ASM_MK_NOP(K8_NOP9)
 
-#define ASM_NOP_MAX 8
+#define ASM_NOP_MAX 9
 
 #endif /* __X86_ASM_NOPS_H__ */




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] x86: improve NOP use for AMD CPUs
  2017-12-18 10:13 ` [PATCH 1/2] x86: improve NOP use for AMD CPUs Jan Beulich
@ 2017-12-18 10:58   ` Andrew Cooper
  2017-12-18 16:50     ` [PATCH v2 " Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2017-12-18 10:58 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 18/12/17 10:13, Jan Beulich wrote:
> For Fam10 and later AMD recommends using the "long" NOP forms. Re-write
> the present Intel code into switch() statements and add AMD logic. This
> at the same time brings us in line again with current Linux.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Overall, the logic looks to be ok.  However, it would be better for us
to default to p6_nops and switch back to k8 if we encounter those
processors.

~Andrew

>
> --- a/xen/arch/x86/alternative.c
> +++ b/xen/arch/x86/alternative.c
> @@ -85,19 +85,34 @@ static int __init mask_nmi_callback(cons
>  
>  static void __init arch_init_ideal_nops(void)
>  {
> -    /*
> -     * Due to a decoder implementation quirk, some
> -     * specific Intel CPUs actually perform better with
> -     * the "k8_nops" than with the SDM-recommended NOPs.
> -     */
> -    if ( (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
> -         !(boot_cpu_data.x86 == 6 &&
> -           boot_cpu_data.x86_model >= 0x0f &&
> -           boot_cpu_data.x86_model != 0x1c &&
> -           boot_cpu_data.x86_model != 0x26 &&
> -           boot_cpu_data.x86_model != 0x27 &&
> -           boot_cpu_data.x86_model < 0x30) )
> -        ideal_nops = p6_nops;
> +    switch ( boot_cpu_data.x86_vendor )
> +    {
> +    case X86_VENDOR_INTEL:
> +        /*
> +         * Due to a decoder implementation quirk, some specific Intel CPUs
> +         * actually perform better with the "k8_nops" than with the SDM-
> +         * recommended NOPs.
> +         */
> +        if ( boot_cpu_data.x86 != 6 )
> +            ideal_nops = p6_nops;
> +        else
> +            switch ( boot_cpu_data.x86_model )
> +            {
> +            case 0x0f ... 0x1b:
> +            case 0x1d ... 0x25:
> +            case 0x28 ... 0x2f:
> +                break;
> +            default:
> +                ideal_nops = p6_nops;
> +                break;
> +            }
> +        break;
> +
> +    case X86_VENDOR_AMD:
> +        if ( boot_cpu_data.x86 > 0xf )
> +            ideal_nops = p6_nops;
> +        break;
> +    }
>  }
>  
>  /* Use this to add nops to a buffer, then text_poke the whole buffer. */
>
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] x86: introduce NOP9 forms
  2017-12-18 10:13 ` [PATCH 2/2] x86: introduce NOP9 forms Jan Beulich
@ 2017-12-18 11:23   ` Andrew Cooper
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2017-12-18 11:23 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 18/12/17 10:13, Jan Beulich wrote:
> Both Intel and AMD recommend an operand-size-override-prefixed long NOP
> form for covering 9 bytes, so introduce this and use it in p6_nops[] to
> allow further reducing the number of NOPs needed when covering larger
> ranges.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/2] x86: improve NOP use for AMD CPUs
  2017-12-18 10:58   ` Andrew Cooper
@ 2017-12-18 16:50     ` Jan Beulich
  2017-12-18 16:51       ` Andrew Cooper
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2017-12-18 16:50 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

For Fam10 and later AMD recommends using the "long" NOP forms. Re-write
the present Intel code into switch() statements and add AMD logic.

Default to "long" forms (which all 64-bit CPUs are supposed to
recognize), overriding to the K8 flavor on those few (older) CPUs.

This at the same time brings us in line again in this regard with
current Linux.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Invert sense.

--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -76,7 +76,7 @@ static const unsigned char * const p6_no
 };
 #endif
 
-static const unsigned char * const *ideal_nops init_or_livepatch_data = k8_nops;
+static const unsigned char * const *ideal_nops init_or_livepatch_data = p6_nops;
 
 static int __init mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
 {
@@ -85,19 +85,32 @@ static int __init mask_nmi_callback(cons
 
 static void __init arch_init_ideal_nops(void)
 {
-    /*
-     * Due to a decoder implementation quirk, some
-     * specific Intel CPUs actually perform better with
-     * the "k8_nops" than with the SDM-recommended NOPs.
-     */
-    if ( (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-         !(boot_cpu_data.x86 == 6 &&
-           boot_cpu_data.x86_model >= 0x0f &&
-           boot_cpu_data.x86_model != 0x1c &&
-           boot_cpu_data.x86_model != 0x26 &&
-           boot_cpu_data.x86_model != 0x27 &&
-           boot_cpu_data.x86_model < 0x30) )
-        ideal_nops = p6_nops;
+    switch ( boot_cpu_data.x86_vendor )
+    {
+    case X86_VENDOR_INTEL:
+        /*
+         * Due to a decoder implementation quirk, some specific Intel CPUs
+         * actually perform better with the "k8_nops" than with the SDM-
+         * recommended NOPs.
+         */
+        if ( boot_cpu_data.x86 != 6 )
+            break;
+
+        switch ( boot_cpu_data.x86_model )
+        {
+        case 0x0f ... 0x1b:
+        case 0x1d ... 0x25:
+        case 0x28 ... 0x2f:
+            ideal_nops = k8_nops;
+            break;
+        }
+        break;
+
+    case X86_VENDOR_AMD:
+        if ( boot_cpu_data.x86 <= 0xf )
+            ideal_nops = k8_nops;
+        break;
+    }
 }
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] x86: improve NOP use for AMD CPUs
  2017-12-18 16:50     ` [PATCH v2 " Jan Beulich
@ 2017-12-18 16:51       ` Andrew Cooper
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2017-12-18 16:51 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 18/12/17 16:50, Jan Beulich wrote:
> For Fam10 and later AMD recommends using the "long" NOP forms. Re-write
> the present Intel code into switch() statements and add AMD logic.
>
> Default to "long" forms (which all 64-bit CPUs are supposed to
> recognize), overriding to the K8 flavor on those few (older) CPUs.
>
> This at the same time brings us in line again in this regard with
> current Linux.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-12-18 16:51 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-18  9:27 [PATCH 0/2] x86: NOP improvements Jan Beulich
2017-12-18 10:13 ` [PATCH 1/2] x86: improve NOP use for AMD CPUs Jan Beulich
2017-12-18 10:58   ` Andrew Cooper
2017-12-18 16:50     ` [PATCH v2 " Jan Beulich
2017-12-18 16:51       ` Andrew Cooper
2017-12-18 10:13 ` [PATCH 2/2] x86: introduce NOP9 forms Jan Beulich
2017-12-18 11:23   ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.