All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] use alternative asm on xsave side
@ 2016-02-02  7:11 Shuai Ruan
  2016-02-02  7:11 ` [PATCH 1/2] x86: add alternavive_io_2/3 to support alternative with 2/3 features Shuai Ruan
  2016-02-02  7:11 ` [PATCH 2/2] x86/xsave: use alternative asm on xsave side Shuai Ruan
  0 siblings, 2 replies; 5+ messages in thread
From: Shuai Ruan @ 2016-02-02  7:11 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, keir, jbeulich

This patch add alternative_io_2/3 to support alternative with 2/3 features.
Also use alternavitive asm on xsave side.

Shuai Ruan (2):
  x86: add alternavive_io_2/3 to support alternative with 2/3 features.
  x86/xsave: use alternative asm on xsave side.

 xen/arch/x86/xstate.c             | 49 ++++++++++-------------------
 xen/include/asm-x86/alternative.h | 65 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 33 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] x86: add alternavive_io_2/3 to support alternative with 2/3 features.
  2016-02-02  7:11 [PATCH 0/2] use alternative asm on xsave side Shuai Ruan
@ 2016-02-02  7:11 ` Shuai Ruan
  2016-02-02  7:11 ` [PATCH 2/2] x86/xsave: use alternative asm on xsave side Shuai Ruan
  1 sibling, 0 replies; 5+ messages in thread
From: Shuai Ruan @ 2016-02-02  7:11 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, keir, jbeulich

Most of the code is porting from linux with some changes.
alternative_io_2 replaces old instruction with new instructions
based on two features.
alternative_io_3 replaces old instruction with new instructions
based on three features.

Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>
---
 xen/include/asm-x86/alternative.h | 65 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/xen/include/asm-x86/alternative.h b/xen/include/asm-x86/alternative.h
index 7d11354..b018613 100644
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -59,6 +59,39 @@ extern void alternative_instructions(void);
         ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
         ".popsection"
 
+#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+        OLDINSTR(oldinstr)                                                \
+        ".pushsection .altinstructions,\"a\"\n"                           \
+        ALTINSTR_ENTRY(feature1, 1)                                       \
+        ALTINSTR_ENTRY(feature2, 2)                                       \
+        ".popsection\n"                                                   \
+        ".pushsection .discard,\"aw\",@progbits\n"                        \
+        DISCARD_ENTRY(1)                                                  \
+        DISCARD_ENTRY(2)                                                  \
+        ".popsection\n"                                                   \
+        ".pushsection .altinstr_replacement, \"ax\"\n"                    \
+        ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)                      \
+        ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                      \
+        ".popsection"
+
+#define ALTERNATIVE_3(oldinstr, newinstr1, feature1, newinstr2, feature2, \
+                      newinstr3, feature3)                                \
+        OLDINSTR(oldinstr)                                                \
+        ".pushsection .altinstructions,\"a\"\n"                           \
+        ALTINSTR_ENTRY(feature1, 1)                                       \
+        ALTINSTR_ENTRY(feature2, 2)                                       \
+        ALTINSTR_ENTRY(feature3, 3)                                       \
+        ".popsection\n"                                                   \
+        ".pushsection .discard,\"aw\",@progbits\n"                        \
+        DISCARD_ENTRY(1)                                                  \
+        DISCARD_ENTRY(2)                                                  \
+        DISCARD_ENTRY(3)                                                  \
+        ".popsection\n"                                                   \
+        ".pushsection .altinstr_replacement, \"ax\"\n"                    \
+        ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)                      \
+        ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                      \
+        ALTINSTR_REPLACEMENT(newinstr3, feature3, 3)                      \
+        ".popsection"
 /*
  * Alternative instructions for different CPU types or capabilities.
  *
@@ -93,6 +126,38 @@ extern void alternative_instructions(void);
 	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)		\
 		      : output : input)
 
+/*
+ *  This is similar to alternative_io. But it has two features and
+ *  respective instructions.
+ *
+ *  If CPU has feature2, newinstr2 is used.
+ *  if CPU has feature1, newinstr1 is used.
+ *  Otherwise, oldinstr is used.
+ */
+
+#define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2,       \
+                         feature2, out_put, input...)                    \
+        asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1,        \
+                     newinstr2, feature2)                                \
+                     : output : input)
+
+/*
+ *  This is similar to alternative_io. But it has three features and
+ *  respective instructions.
+ *
+ *  If CPU has feature3, newinstr3 is used.
+ *  If CPU has feature2, newinstr2 is used.
+ *  if CPU has feature1, newinstr1 is used.
+ *  Otherwise, oldinstr is used.
+ */
+
+#define alternative_io_3(oldinstr, newinstr1, feature1, newinstr2,       \
+                         feature2, newinstr3, feature3, output,          \
+                         input...)                                       \
+        asm volatile(ALTERNATIVE_3(oldinstr, newinstr1, feature1,        \
+                     newinstr2, feature2, newinstr3, feature3)           \
+                     : output : input)
+
 /* Use this macro(s) if you need more than one output parameter. */
 #define ASM_OUTPUT2(a...) a
 
-- 
1.9.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] x86/xsave: use alternative asm on xsave side.
  2016-02-02  7:11 [PATCH 0/2] use alternative asm on xsave side Shuai Ruan
  2016-02-02  7:11 ` [PATCH 1/2] x86: add alternavive_io_2/3 to support alternative with 2/3 features Shuai Ruan
@ 2016-02-02  7:11 ` Shuai Ruan
  2016-02-03 11:40   ` Jan Beulich
  1 sibling, 1 reply; 5+ messages in thread
From: Shuai Ruan @ 2016-02-02  7:11 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, keir, jbeulich

This patch use alternavtive asm on the xsave side.
As xsaves use modified optimization like xsaveopt, xsaves
may not writing the FPU portion of the save image too.
So xsaves also need some extra tweaks.

Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>
---
 xen/arch/x86/xstate.c | 49 ++++++++++++++++---------------------------------
 1 file changed, 16 insertions(+), 33 deletions(-)

diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 4e87ab3..832f4ad 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -248,24 +248,26 @@ void xsave(struct vcpu *v, uint64_t mask)
     uint32_t hmask = mask >> 32;
     uint32_t lmask = mask;
     int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
+#define XSAVE(pfx) \
+        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
+                         ".byte " pfx "0x0f,0xae,0x37\n", \
+                         X86_FEATURE_XSAVEOPT, \
+                         ".byte " pfx "0x0f,0xc7,0x27\n", \
+                         X86_FEATURE_XSAVEC, \
+                         ".byte " pfx "0x0f,0xc7,0x37\n", \
+                         X86_FEATURE_XSAVES, \
+                         "=m" (*ptr), \
+                         "a" (lmask), "d" (hmask), "D" (ptr))
 
     if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
     {
         typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
         typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
 
-        if ( cpu_has_xsaves )
-            asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsavec )
-            asm volatile ( ".byte 0x48,0x0f,0xc7,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsaveopt )
+        if ( cpu_has_xsaveopt || cpu_has_xsaves )
         {
             /*
-             * xsaveopt may not write the FPU portion even when the respective
+             * xsaveopt/xsaves may not write the FPU portion even when the respective
              * mask bit is set. For the check further down to work we hence
              * need to put the save image back into the state that it was in
              * right after the previous xsaveopt.
@@ -277,14 +279,9 @@ void xsave(struct vcpu *v, uint64_t mask)
                 ptr->fpu_sse.fip.sel = 0;
                 ptr->fpu_sse.fdp.sel = 0;
             }
-            asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
         }
-        else
-            asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+
+        XSAVE("0x48,");
 
         if ( !(mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) ||
              /*
@@ -315,24 +312,10 @@ void xsave(struct vcpu *v, uint64_t mask)
     }
     else
     {
-        if ( cpu_has_xsaves )
-            asm volatile ( ".byte 0x0f,0xc7,0x2f"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsavec )
-            asm volatile ( ".byte 0x0f,0xc7,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else if ( cpu_has_xsaveopt )
-            asm volatile ( ".byte 0x0f,0xae,0x37"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
-        else
-            asm volatile ( ".byte 0x0f,0xae,0x27"
-                           : "=m" (*ptr)
-                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        XSAVE("");
         word_size = 4;
     }
+#undef XSAVE
     if ( word_size >= 0 )
         ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size;
 }
-- 
1.9.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] x86/xsave: use alternative asm on xsave side.
  2016-02-02  7:11 ` [PATCH 2/2] x86/xsave: use alternative asm on xsave side Shuai Ruan
@ 2016-02-03 11:40   ` Jan Beulich
  2016-02-03 12:08     ` Jan Beulich
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2016-02-03 11:40 UTC (permalink / raw)
  To: Shuai Ruan; +Cc: andrew.cooper3, keir, xen-devel

>>> On 02.02.16 at 08:11, <shuai.ruan@linux.intel.com> wrote:
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -248,24 +248,26 @@ void xsave(struct vcpu *v, uint64_t mask)
>      uint32_t hmask = mask >> 32;
>      uint32_t lmask = mask;
>      int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
> +#define XSAVE(pfx) \
> +        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
> +                         ".byte " pfx "0x0f,0xae,0x37\n", \
> +                         X86_FEATURE_XSAVEOPT, \
> +                         ".byte " pfx "0x0f,0xc7,0x27\n", \
> +                         X86_FEATURE_XSAVEC, \
> +                         ".byte " pfx "0x0f,0xc7,0x37\n", \
> +                         X86_FEATURE_XSAVES, \
> +                         "=m" (*ptr), \
> +                         "a" (lmask), "d" (hmask), "D" (ptr))
>  
>      if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
>      {
>          typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
>          typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
>  
> -        if ( cpu_has_xsaves )
> -            asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
> -                           : "=m" (*ptr)
> -                           : "a" (lmask), "d" (hmask), "D" (ptr) );
> -        else if ( cpu_has_xsavec )
> -            asm volatile ( ".byte 0x48,0x0f,0xc7,0x27"
> -                           : "=m" (*ptr)
> -                           : "a" (lmask), "d" (hmask), "D" (ptr) );
> -        else if ( cpu_has_xsaveopt )
> +        if ( cpu_has_xsaveopt || cpu_has_xsaves )
>          {
>              /*
> -             * xsaveopt may not write the FPU portion even when the respective
> +             * xsaveopt/xsaves may not write the FPU portion even when the respective

Apart from this line now being too long and hence the entire
comment needing re-formatting
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] x86/xsave: use alternative asm on xsave side.
  2016-02-03 11:40   ` Jan Beulich
@ 2016-02-03 12:08     ` Jan Beulich
  0 siblings, 0 replies; 5+ messages in thread
From: Jan Beulich @ 2016-02-03 12:08 UTC (permalink / raw)
  To: Shuai Ruan; +Cc: andrew.cooper3, keir, xen-devel

>>> On 03.02.16 at 12:40, <JBeulich@suse.com> wrote:
>>>> On 02.02.16 at 08:11, <shuai.ruan@linux.intel.com> wrote:
>> --- a/xen/arch/x86/xstate.c
>> +++ b/xen/arch/x86/xstate.c
>> @@ -248,24 +248,26 @@ void xsave(struct vcpu *v, uint64_t mask)
>>      uint32_t hmask = mask >> 32;
>>      uint32_t lmask = mask;
>>      int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
>> +#define XSAVE(pfx) \
>> +        alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
>> +                         ".byte " pfx "0x0f,0xae,0x37\n", \
>> +                         X86_FEATURE_XSAVEOPT, \
>> +                         ".byte " pfx "0x0f,0xc7,0x27\n", \
>> +                         X86_FEATURE_XSAVEC, \
>> +                         ".byte " pfx "0x0f,0xc7,0x37\n", \
>> +                         X86_FEATURE_XSAVES, \
>> +                         "=m" (*ptr), \
>> +                         "a" (lmask), "d" (hmask), "D" (ptr))
>>  
>>      if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
>>      {
>>          typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
>>          typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
>>  
>> -        if ( cpu_has_xsaves )
>> -            asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
>> -                           : "=m" (*ptr)
>> -                           : "a" (lmask), "d" (hmask), "D" (ptr) );
>> -        else if ( cpu_has_xsavec )
>> -            asm volatile ( ".byte 0x48,0x0f,0xc7,0x27"
>> -                           : "=m" (*ptr)
>> -                           : "a" (lmask), "d" (hmask), "D" (ptr) );
>> -        else if ( cpu_has_xsaveopt )
>> +        if ( cpu_has_xsaveopt || cpu_has_xsaves )
>>          {
>>              /*
>> -             * xsaveopt may not write the FPU portion even when the 
> respective
>> +             * xsaveopt/xsaves may not write the FPU portion even when the 
> respective
> 
> Apart from this line now being too long and hence the entire
> comment needing re-formatting
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

Withdrawn. There's a bug here, and since I'm re-doing patch 1
from scratch I'll send out the result later.

Jan

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-02-03 12:08 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-02  7:11 [PATCH 0/2] use alternative asm on xsave side Shuai Ruan
2016-02-02  7:11 ` [PATCH 1/2] x86: add alternavive_io_2/3 to support alternative with 2/3 features Shuai Ruan
2016-02-02  7:11 ` [PATCH 2/2] x86/xsave: use alternative asm on xsave side Shuai Ruan
2016-02-03 11:40   ` Jan Beulich
2016-02-03 12:08     ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.