* [PATCH 1/2] x86: add alternavive_io_2/3 to support alternative with 2/3 features.
2016-02-02 7:11 [PATCH 0/2] use alternative asm on xsave side Shuai Ruan
@ 2016-02-02 7:11 ` Shuai Ruan
2016-02-02 7:11 ` [PATCH 2/2] x86/xsave: use alternative asm on xsave side Shuai Ruan
1 sibling, 0 replies; 5+ messages in thread
From: Shuai Ruan @ 2016-02-02 7:11 UTC (permalink / raw)
To: xen-devel; +Cc: andrew.cooper3, keir, jbeulich
Most of the code is porting from linux with some changes.
alternative_io_2 replaces old instruction with new instructions
based on two features.
alternative_io_3 replaces old instruction with new instructions
based on three features.
Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>
---
xen/include/asm-x86/alternative.h | 65 +++++++++++++++++++++++++++++++++++++++
1 file changed, 65 insertions(+)
diff --git a/xen/include/asm-x86/alternative.h b/xen/include/asm-x86/alternative.h
index 7d11354..b018613 100644
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -59,6 +59,39 @@ extern void alternative_instructions(void);
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
".popsection"
+#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+ OLDINSTR(oldinstr) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feature1, 1) \
+ ALTINSTR_ENTRY(feature2, 2) \
+ ".popsection\n" \
+ ".pushsection .discard,\"aw\",@progbits\n" \
+ DISCARD_ENTRY(1) \
+ DISCARD_ENTRY(2) \
+ ".popsection\n" \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
+ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
+ ".popsection"
+
+#define ALTERNATIVE_3(oldinstr, newinstr1, feature1, newinstr2, feature2, \
+ newinstr3, feature3) \
+ OLDINSTR(oldinstr) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feature1, 1) \
+ ALTINSTR_ENTRY(feature2, 2) \
+ ALTINSTR_ENTRY(feature3, 3) \
+ ".popsection\n" \
+ ".pushsection .discard,\"aw\",@progbits\n" \
+ DISCARD_ENTRY(1) \
+ DISCARD_ENTRY(2) \
+ DISCARD_ENTRY(3) \
+ ".popsection\n" \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
+ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
+ ALTINSTR_REPLACEMENT(newinstr3, feature3, 3) \
+ ".popsection"
/*
* Alternative instructions for different CPU types or capabilities.
*
@@ -93,6 +126,38 @@ extern void alternative_instructions(void);
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
: output : input)
+/*
+ * This is similar to alternative_io. But it has two features and
+ * respective instructions.
+ *
+ * If CPU has feature2, newinstr2 is used.
+ * if CPU has feature1, newinstr1 is used.
+ * Otherwise, oldinstr is used.
+ */
+
+#define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2, \
+ feature2, out_put, input...) \
+ asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
+ newinstr2, feature2) \
+ : output : input)
+
+/*
+ * This is similar to alternative_io. But it has three features and
+ * respective instructions.
+ *
+ * If CPU has feature3, newinstr3 is used.
+ * If CPU has feature2, newinstr2 is used.
+ * if CPU has feature1, newinstr1 is used.
+ * Otherwise, oldinstr is used.
+ */
+
+#define alternative_io_3(oldinstr, newinstr1, feature1, newinstr2, \
+ feature2, newinstr3, feature3, output, \
+ input...) \
+ asm volatile(ALTERNATIVE_3(oldinstr, newinstr1, feature1, \
+ newinstr2, feature2, newinstr3, feature3) \
+ : output : input)
+
/* Use this macro(s) if you need more than one output parameter. */
#define ASM_OUTPUT2(a...) a
--
1.9.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] x86/xsave: use alternative asm on xsave side.
2016-02-02 7:11 [PATCH 0/2] use alternative asm on xsave side Shuai Ruan
2016-02-02 7:11 ` [PATCH 1/2] x86: add alternavive_io_2/3 to support alternative with 2/3 features Shuai Ruan
@ 2016-02-02 7:11 ` Shuai Ruan
2016-02-03 11:40 ` Jan Beulich
1 sibling, 1 reply; 5+ messages in thread
From: Shuai Ruan @ 2016-02-02 7:11 UTC (permalink / raw)
To: xen-devel; +Cc: andrew.cooper3, keir, jbeulich
This patch use alternavtive asm on the xsave side.
As xsaves use modified optimization like xsaveopt, xsaves
may not writing the FPU portion of the save image too.
So xsaves also need some extra tweaks.
Signed-off-by: Shuai Ruan <shuai.ruan@linux.intel.com>
---
xen/arch/x86/xstate.c | 49 ++++++++++++++++---------------------------------
1 file changed, 16 insertions(+), 33 deletions(-)
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 4e87ab3..832f4ad 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -248,24 +248,26 @@ void xsave(struct vcpu *v, uint64_t mask)
uint32_t hmask = mask >> 32;
uint32_t lmask = mask;
int word_size = mask & XSTATE_FP ? (cpu_has_fpu_sel ? 8 : 0) : -1;
+#define XSAVE(pfx) \
+ alternative_io_3(".byte " pfx "0x0f,0xae,0x27\n", \
+ ".byte " pfx "0x0f,0xae,0x37\n", \
+ X86_FEATURE_XSAVEOPT, \
+ ".byte " pfx "0x0f,0xc7,0x27\n", \
+ X86_FEATURE_XSAVEC, \
+ ".byte " pfx "0x0f,0xc7,0x37\n", \
+ X86_FEATURE_XSAVES, \
+ "=m" (*ptr), \
+ "a" (lmask), "d" (hmask), "D" (ptr))
if ( word_size <= 0 || !is_pv_32bit_vcpu(v) )
{
typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
- if ( cpu_has_xsaves )
- asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
- : "=m" (*ptr)
- : "a" (lmask), "d" (hmask), "D" (ptr) );
- else if ( cpu_has_xsavec )
- asm volatile ( ".byte 0x48,0x0f,0xc7,0x27"
- : "=m" (*ptr)
- : "a" (lmask), "d" (hmask), "D" (ptr) );
- else if ( cpu_has_xsaveopt )
+ if ( cpu_has_xsaveopt || cpu_has_xsaves )
{
/*
- * xsaveopt may not write the FPU portion even when the respective
+ * xsaveopt/xsaves may not write the FPU portion even when the respective
* mask bit is set. For the check further down to work we hence
* need to put the save image back into the state that it was in
* right after the previous xsaveopt.
@@ -277,14 +279,9 @@ void xsave(struct vcpu *v, uint64_t mask)
ptr->fpu_sse.fip.sel = 0;
ptr->fpu_sse.fdp.sel = 0;
}
- asm volatile ( ".byte 0x48,0x0f,0xae,0x37"
- : "=m" (*ptr)
- : "a" (lmask), "d" (hmask), "D" (ptr) );
}
- else
- asm volatile ( ".byte 0x48,0x0f,0xae,0x27"
- : "=m" (*ptr)
- : "a" (lmask), "d" (hmask), "D" (ptr) );
+
+ XSAVE("0x48,");
if ( !(mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) ||
/*
@@ -315,24 +312,10 @@ void xsave(struct vcpu *v, uint64_t mask)
}
else
{
- if ( cpu_has_xsaves )
- asm volatile ( ".byte 0x0f,0xc7,0x2f"
- : "=m" (*ptr)
- : "a" (lmask), "d" (hmask), "D" (ptr) );
- else if ( cpu_has_xsavec )
- asm volatile ( ".byte 0x0f,0xc7,0x27"
- : "=m" (*ptr)
- : "a" (lmask), "d" (hmask), "D" (ptr) );
- else if ( cpu_has_xsaveopt )
- asm volatile ( ".byte 0x0f,0xae,0x37"
- : "=m" (*ptr)
- : "a" (lmask), "d" (hmask), "D" (ptr) );
- else
- asm volatile ( ".byte 0x0f,0xae,0x27"
- : "=m" (*ptr)
- : "a" (lmask), "d" (hmask), "D" (ptr) );
+ XSAVE("");
word_size = 4;
}
+#undef XSAVE
if ( word_size >= 0 )
ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = word_size;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 5+ messages in thread