* [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
@ 2017-02-15 14:22 Andrew Cooper
2017-02-15 14:25 ` Andrew Cooper
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Andrew Cooper @ 2017-02-15 14:22 UTC (permalink / raw)
To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich
bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
result in forward declarations of the bitops.h contents. Move ASM_FLAG_OUT()
to a new asm/compiler.h to compensate.
While making changes, switch bool_t to bool and use named asm parameters.
No (intended) functional change. Without GCC flags, the disassembly is
identical before and after this patch. With GCC flags however, this patch
seems to cause GCC to make instruction scheduling decisions.
The first place with significant changes in the disassembly is the the middle
of do_domctl(), which switches from:
lock decl 0x1d8(%rax)
sete %r14b
mov $0xffffffffffffffea,%rbx
test %r14b,%r14b
je ffff82d0801034d4
to:
lock decl 0x1d8(%rax)
mov $0x0,%r14d
mov $0xffffffffffffffea,%rbx
jne ffff82d0801034d4
avoiding the use of %r14d as an intermediate for calculating the conditional
jump, freeing it up for later use. As a result, the compiled code is a little
more efficient and smaller overall.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
I have confirmed using:
diff --git a/xen/include/asm-x86/atomic.h b/xen/include/asm-x86/atomic.h
index ef7e70b..de79443 100644
--- a/xen/include/asm-x86/atomic.h
+++ b/xen/include/asm-x86/atomic.h
@@ -197,7 +197,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
#ifdef __GCC_ASM_FLAG_OUTPUTS__
asm volatile ( "lock; decl %0"
- : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
+ : "+m" (*(volatile int *)&v->counter), "=@ccnz" (c)
:: "memory" );
#else
asm volatile ( "lock; decl %0; setz %1"
that compiler explicitly chose to insert the sete instruction despite the
"=@ccz" condition. The resulting hypervisor does work perfectly well, so I
guess this will be down to some internal details of GCC's code generation.
---
xen/include/asm-x86/asm_defns.h | 6 ---
xen/include/asm-x86/atomic.h | 64 ++++++++++++-------------------
xen/include/asm-x86/bitops.h | 85 ++++++++++++++---------------------------
xen/include/asm-x86/compiler.h | 20 ++++++++++
xen/include/asm-x86/config.h | 1 +
5 files changed, 73 insertions(+), 103 deletions(-)
create mode 100644 xen/include/asm-x86/compiler.h
diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 220ae2e..f1c6fa1 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -413,10 +413,4 @@ static always_inline void stac(void)
#define REX64_PREFIX "rex64/"
#endif
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define ASM_FLAG_OUT(yes, no) yes
-#else
-# define ASM_FLAG_OUT(yes, no) no
-#endif
-
#endif /* __X86_ASM_DEFNS_H__ */
diff --git a/xen/include/asm-x86/atomic.h b/xen/include/asm-x86/atomic.h
index ef7e70b..d2a311c 100644
--- a/xen/include/asm-x86/atomic.h
+++ b/xen/include/asm-x86/atomic.h
@@ -133,17 +133,13 @@ static inline int atomic_sub_return(int i, atomic_t *v)
static inline int atomic_sub_and_test(int i, atomic_t *v)
{
- bool_t c;
-
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; subl %2,%0"
- : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
- : "ir" (i) : "memory" );
-#else
- asm volatile ( "lock; subl %2,%0; setz %1"
- : "+m" (*(volatile int *)&v->counter), "=qm" (c)
- : "ir" (i) : "memory" );
-#endif
+ bool c;
+
+ asm volatile ( "lock; subl %[i], %[counter]\n\t"
+ ASM_FLAG_OUT(, "setz %[zf]\n\t")
+ : [counter] "+m" (*(volatile int *)&v->counter),
+ [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
+ : [i] "ir" (i) : "memory" );
return c;
}
@@ -163,17 +159,13 @@ static inline int atomic_inc_return(atomic_t *v)
static inline int atomic_inc_and_test(atomic_t *v)
{
- bool_t c;
+ bool c;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; incl %0"
- : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
- :: "memory" );
-#else
- asm volatile ( "lock; incl %0; setz %1"
- : "+m" (*(volatile int *)&v->counter), "=qm" (c)
+ asm volatile ( "lock; incl %[counter]\n\t"
+ ASM_FLAG_OUT(, "setz %[zf]\n\t")
+ : [counter] "+m" (*(volatile int *)&v->counter),
+ [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
:: "memory" );
-#endif
return c;
}
@@ -193,34 +185,26 @@ static inline int atomic_dec_return(atomic_t *v)
static inline int atomic_dec_and_test(atomic_t *v)
{
- bool_t c;
+ bool c;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; decl %0"
- : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
+ asm volatile ( "lock; decl %[counter]\n\t"
+ ASM_FLAG_OUT(, "setz %[zf]\n\t")
+ : [counter] "+m" (*(volatile int *)&v->counter),
+ [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
:: "memory" );
-#else
- asm volatile ( "lock; decl %0; setz %1"
- : "+m" (*(volatile int *)&v->counter), "=qm" (c)
- :: "memory" );
-#endif
return c;
}
static inline int atomic_add_negative(int i, atomic_t *v)
{
- bool_t c;
-
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; addl %2,%0"
- : "+m" (*(volatile int *)&v->counter), "=@ccs" (c)
- : "ir" (i) : "memory" );
-#else
- asm volatile ( "lock; addl %2,%0; sets %1"
- : "+m" (*(volatile int *)&v->counter), "=qm" (c)
- : "ir" (i) : "memory" );
-#endif
+ bool c;
+
+ asm volatile ( "lock; addl %[i], %[counter]\n\t"
+ ASM_FLAG_OUT(, "sets %[sf]\n\t")
+ : [counter] "+m" (*(volatile int *)&v->counter),
+ [sf] ASM_FLAG_OUT("=@ccs", "=qm") (c)
+ : [i] "ir" (i) : "memory" );
return c;
}
diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h
index 0f18645..440abb7 100644
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -144,13 +144,10 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; btsl %2,%1"
- : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "lock; btsl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "lock; btsl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -172,15 +169,10 @@ static inline int __test_and_set_bit(int nr, void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "btsl %2,%1"
- : "=@ccc" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "btsl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "btsl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -201,13 +193,10 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; btrl %2,%1"
- : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "lock; btrl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "lock; btrl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -229,15 +218,10 @@ static inline int __test_and_clear_bit(int nr, void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "btrl %2,%1"
- : "=@ccc" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "btrl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "btrl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -251,15 +235,10 @@ static inline int __test_and_change_bit(int nr, void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "btcl %2,%1"
- : "=@ccc" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "btcl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "btcl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -280,13 +259,10 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; btcl %2,%1"
- : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "lock; btcl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "lock; btcl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -305,15 +281,10 @@ static inline int variable_test_bit(int nr, const volatile void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "btl %2,%1"
- : "=@ccc" (oldbit)
- : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
-#else
- asm volatile ( "btl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit)
- : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "btl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit)
+ : [addr] "m" (CONST_ADDR), [nr] "Ir" (nr) : "memory" );
return oldbit;
}
diff --git a/xen/include/asm-x86/compiler.h b/xen/include/asm-x86/compiler.h
new file mode 100644
index 0000000..1fd6a8d
--- /dev/null
+++ b/xen/include/asm-x86/compiler.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_X86_COMPILER_H__
+#define __ASM_X86_COMPILER_H__
+
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define ASM_FLAG_OUT(yes, no) yes
+#else
+# define ASM_FLAG_OUT(yes, no) no
+#endif
+
+#endif /* __ASM_X86_COMPILER_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 6fd84e7..b48a0de 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -107,6 +107,7 @@ extern unsigned char boot_edid_info[128];
#define asmlinkage
#include <xen/const.h>
+#include <asm/compiler.h>
#define PML4_ENTRY_BITS 39
#define PML4_ENTRY_BYTES (_AC(1,UL) << PML4_ENTRY_BITS)
--
2.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
2017-02-15 14:22 [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs Andrew Cooper
@ 2017-02-15 14:25 ` Andrew Cooper
2017-02-15 14:55 ` Jan Beulich
2017-02-15 16:53 ` Andrew Cooper
2 siblings, 0 replies; 8+ messages in thread
From: Andrew Cooper @ 2017-02-15 14:25 UTC (permalink / raw)
To: Xen-devel; +Cc: Jan Beulich
On 15/02/17 14:22, Andrew Cooper wrote:
> bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
> result in forward declarations of the bitops.h contents. Move ASM_FLAG_OUT()
> to a new asm/compiler.h to compensate.
>
> While making changes, switch bool_t to bool and use named asm parameters.
>
> No (intended) functional change. Without GCC flags, the disassembly is
> identical before and after this patch. With GCC flags however, this patch
> seems to cause GCC to make instruction scheduling decisions.
This is supposed to read "make better instruction...".
~Andrew
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
2017-02-15 14:22 [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs Andrew Cooper
2017-02-15 14:25 ` Andrew Cooper
@ 2017-02-15 14:55 ` Jan Beulich
2017-02-15 15:13 ` Jan Beulich
2017-02-15 16:00 ` Andrew Cooper
2017-02-15 16:53 ` Andrew Cooper
2 siblings, 2 replies; 8+ messages in thread
From: Jan Beulich @ 2017-02-15 14:55 UTC (permalink / raw)
To: Andrew Cooper; +Cc: Xen-devel
>>> On 15.02.17 at 15:22, <andrew.cooper3@citrix.com> wrote:
> bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
> result in forward declarations of the bitops.h contents. Move ASM_FLAG_OUT()
> to a new asm/compiler.h to compensate.
>
> While making changes, switch bool_t to bool and use named asm parameters.
>
> No (intended) functional change. Without GCC flags, the disassembly is
> identical before and after this patch. With GCC flags however, this patch
> seems to cause GCC to make instruction scheduling decisions.
>
> The first place with significant changes in the disassembly is the the middle
> of do_domctl(), which switches from:
>
> lock decl 0x1d8(%rax)
> sete %r14b
> mov $0xffffffffffffffea,%rbx
> test %r14b,%r14b
> je ffff82d0801034d4
>
> to:
>
> lock decl 0x1d8(%rax)
> mov $0x0,%r14d
> mov $0xffffffffffffffea,%rbx
> jne ffff82d0801034d4
>
> avoiding the use of %r14d as an intermediate for calculating the conditional
> jump, freeing it up for later use. As a result, the compiled code is a little
> more efficient and smaller overall.
I have to admit that I find this rather surprising, especially since
(leaving the naming of the asm() operands aside) the preprocessed
source should not change afaict. It being unexpected I think calls
for better understanding what's going on here.
> --- a/xen/include/asm-x86/config.h
> +++ b/xen/include/asm-x86/config.h
> @@ -107,6 +107,7 @@ extern unsigned char boot_edid_info[128];
> #define asmlinkage
>
> #include <xen/const.h>
> +#include <asm/compiler.h>
Why here? You need it in bitops.h and maybe (it's not really used
there) asm_defns.h only for now.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
2017-02-15 14:55 ` Jan Beulich
@ 2017-02-15 15:13 ` Jan Beulich
2017-02-15 16:00 ` Andrew Cooper
1 sibling, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2017-02-15 15:13 UTC (permalink / raw)
To: Andrew Cooper; +Cc: Xen-devel
>>> On 15.02.17 at 15:55, <JBeulich@suse.com> wrote:
>>>> On 15.02.17 at 15:22, <andrew.cooper3@citrix.com> wrote:
>> bitops.h cannot include asm_defns.h, because the static inlines in
> cpumasks.h
>> result in forward declarations of the bitops.h contents. Move
> ASM_FLAG_OUT()
>> to a new asm/compiler.h to compensate.
>>
>> While making changes, switch bool_t to bool and use named asm parameters.
>>
>> No (intended) functional change. Without GCC flags, the disassembly is
>> identical before and after this patch. With GCC flags however, this patch
>> seems to cause GCC to make instruction scheduling decisions.
>>
>> The first place with significant changes in the disassembly is the the
> middle
>> of do_domctl(), which switches from:
>>
>> lock decl 0x1d8(%rax)
>> sete %r14b
>> mov $0xffffffffffffffea,%rbx
>> test %r14b,%r14b
>> je ffff82d0801034d4
>>
>> to:
>>
>> lock decl 0x1d8(%rax)
>> mov $0x0,%r14d
>> mov $0xffffffffffffffea,%rbx
>> jne ffff82d0801034d4
>>
>> avoiding the use of %r14d as an intermediate for calculating the conditional
>> jump, freeing it up for later use. As a result, the compiled code is a
> little
>> more efficient and smaller overall.
>
> I have to admit that I find this rather surprising, especially since
> (leaving the naming of the asm() operands aside) the preprocessed
> source should not change afaict. It being unexpected I think calls
> for better understanding what's going on here.
This is a result from the bool_t -> bool change and has nothing to do
with the use of ASM_FLAG_OUT(). There's also no difference of
produced code in a non-debug build.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
2017-02-15 14:55 ` Jan Beulich
2017-02-15 15:13 ` Jan Beulich
@ 2017-02-15 16:00 ` Andrew Cooper
2017-02-15 16:08 ` Jan Beulich
1 sibling, 1 reply; 8+ messages in thread
From: Andrew Cooper @ 2017-02-15 16:00 UTC (permalink / raw)
To: Jan Beulich; +Cc: Xen-devel
On 15/02/17 14:55, Jan Beulich wrote:
>>>> On 15.02.17 at 15:22, <andrew.cooper3@citrix.com> wrote:
>> bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
>> result in forward declarations of the bitops.h contents. Move ASM_FLAG_OUT()
>> to a new asm/compiler.h to compensate.
>>
>> While making changes, switch bool_t to bool and use named asm parameters.
>>
>> No (intended) functional change. Without GCC flags, the disassembly is
>> identical before and after this patch. With GCC flags however, this patch
>> seems to cause GCC to make instruction scheduling decisions.
>>
>> The first place with significant changes in the disassembly is the the middle
>> of do_domctl(), which switches from:
>>
>> lock decl 0x1d8(%rax)
>> sete %r14b
>> mov $0xffffffffffffffea,%rbx
>> test %r14b,%r14b
>> je ffff82d0801034d4
>>
>> to:
>>
>> lock decl 0x1d8(%rax)
>> mov $0x0,%r14d
>> mov $0xffffffffffffffea,%rbx
>> jne ffff82d0801034d4
>>
>> avoiding the use of %r14d as an intermediate for calculating the conditional
>> jump, freeing it up for later use. As a result, the compiled code is a little
>> more efficient and smaller overall.
> I have to admit that I find this rather surprising, especially since
> (leaving the naming of the asm() operands aside) the preprocessed
> source should not change afaict. It being unexpected I think calls
> for better understanding what's going on here.
I presume you are happy now given your other discovery?
>
>> --- a/xen/include/asm-x86/config.h
>> +++ b/xen/include/asm-x86/config.h
>> @@ -107,6 +107,7 @@ extern unsigned char boot_edid_info[128];
>> #define asmlinkage
>>
>> #include <xen/const.h>
>> +#include <asm/compiler.h>
> Why here? You need it in bitops.h and maybe (it's not really used
> there) asm_defns.h only for now.
I am looking to avoid duplicating the definition, and this seemed like
an appropriate place to live.
Would you instead prefer me to duplicate it in bitops?
~Andrew
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
2017-02-15 16:00 ` Andrew Cooper
@ 2017-02-15 16:08 ` Jan Beulich
0 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2017-02-15 16:08 UTC (permalink / raw)
To: Andrew Cooper; +Cc: Xen-devel
>>> On 15.02.17 at 17:00, <andrew.cooper3@citrix.com> wrote:
> On 15/02/17 14:55, Jan Beulich wrote:
>>>>> On 15.02.17 at 15:22, <andrew.cooper3@citrix.com> wrote:
>>> bitops.h cannot include asm_defns.h, because the static inlines in
> cpumasks.h
>>> result in forward declarations of the bitops.h contents. Move
> ASM_FLAG_OUT()
>>> to a new asm/compiler.h to compensate.
>>>
>>> While making changes, switch bool_t to bool and use named asm parameters.
>>>
>>> No (intended) functional change. Without GCC flags, the disassembly is
>>> identical before and after this patch. With GCC flags however, this patch
>>> seems to cause GCC to make instruction scheduling decisions.
>>>
>>> The first place with significant changes in the disassembly is the the
> middle
>>> of do_domctl(), which switches from:
>>>
>>> lock decl 0x1d8(%rax)
>>> sete %r14b
>>> mov $0xffffffffffffffea,%rbx
>>> test %r14b,%r14b
>>> je ffff82d0801034d4
>>>
>>> to:
>>>
>>> lock decl 0x1d8(%rax)
>>> mov $0x0,%r14d
>>> mov $0xffffffffffffffea,%rbx
>>> jne ffff82d0801034d4
>>>
>>> avoiding the use of %r14d as an intermediate for calculating the conditional
>>> jump, freeing it up for later use. As a result, the compiled code is a
> little
>>> more efficient and smaller overall.
>> I have to admit that I find this rather surprising, especially since
>> (leaving the naming of the asm() operands aside) the preprocessed
>> source should not change afaict. It being unexpected I think calls
>> for better understanding what's going on here.
>
> I presume you are happy now given your other discovery?
Yes, albeit the commit message may want refining a little.
>>> --- a/xen/include/asm-x86/config.h
>>> +++ b/xen/include/asm-x86/config.h
>>> @@ -107,6 +107,7 @@ extern unsigned char boot_edid_info[128];
>>> #define asmlinkage
>>>
>>> #include <xen/const.h>
>>> +#include <asm/compiler.h>
>> Why here? You need it in bitops.h and maybe (it's not really used
>> there) asm_defns.h only for now.
>
> I am looking to avoid duplicating the definition, and this seemed like
> an appropriate place to live.
>
> Would you instead prefer me to duplicate it in bitops?
Duplicate what? The macro? No. The new header is fine, just that
I'd rather not see it included from config.h. And btw., other than
when Sergey wanted to put in a non-x86 header, instead of
introducing asm/compiler.h here, I think I would then agree to
simply put it in xen/compiler.h and be done once and for all.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
2017-02-15 14:22 [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs Andrew Cooper
2017-02-15 14:25 ` Andrew Cooper
2017-02-15 14:55 ` Jan Beulich
@ 2017-02-15 16:53 ` Andrew Cooper
2017-02-15 16:59 ` Jan Beulich
2 siblings, 1 reply; 8+ messages in thread
From: Andrew Cooper @ 2017-02-15 16:53 UTC (permalink / raw)
To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich
bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
result in forward declarations of the bitops.h contents. Move ASM_FLAG_OUT()
to a new asm/compiler.h to compensate.
While making changes, switch bool_t to bool and use named asm parameters.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
v2:
* Move ASM_FLAG_OUT() to xen/compiler.h rather than introducing
asm-x86/compiler.h
* Drop a lot of the commit message.
---
xen/include/asm-x86/asm_defns.h | 6 ---
xen/include/asm-x86/atomic.h | 64 ++++++++++++-------------------
xen/include/asm-x86/bitops.h | 85 ++++++++++++++---------------------------
xen/include/xen/compiler.h | 6 +++
4 files changed, 58 insertions(+), 103 deletions(-)
diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 220ae2e..f1c6fa1 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -413,10 +413,4 @@ static always_inline void stac(void)
#define REX64_PREFIX "rex64/"
#endif
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define ASM_FLAG_OUT(yes, no) yes
-#else
-# define ASM_FLAG_OUT(yes, no) no
-#endif
-
#endif /* __X86_ASM_DEFNS_H__ */
diff --git a/xen/include/asm-x86/atomic.h b/xen/include/asm-x86/atomic.h
index ef7e70b..d2a311c 100644
--- a/xen/include/asm-x86/atomic.h
+++ b/xen/include/asm-x86/atomic.h
@@ -133,17 +133,13 @@ static inline int atomic_sub_return(int i, atomic_t *v)
static inline int atomic_sub_and_test(int i, atomic_t *v)
{
- bool_t c;
-
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; subl %2,%0"
- : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
- : "ir" (i) : "memory" );
-#else
- asm volatile ( "lock; subl %2,%0; setz %1"
- : "+m" (*(volatile int *)&v->counter), "=qm" (c)
- : "ir" (i) : "memory" );
-#endif
+ bool c;
+
+ asm volatile ( "lock; subl %[i], %[counter]\n\t"
+ ASM_FLAG_OUT(, "setz %[zf]\n\t")
+ : [counter] "+m" (*(volatile int *)&v->counter),
+ [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
+ : [i] "ir" (i) : "memory" );
return c;
}
@@ -163,17 +159,13 @@ static inline int atomic_inc_return(atomic_t *v)
static inline int atomic_inc_and_test(atomic_t *v)
{
- bool_t c;
+ bool c;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; incl %0"
- : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
- :: "memory" );
-#else
- asm volatile ( "lock; incl %0; setz %1"
- : "+m" (*(volatile int *)&v->counter), "=qm" (c)
+ asm volatile ( "lock; incl %[counter]\n\t"
+ ASM_FLAG_OUT(, "setz %[zf]\n\t")
+ : [counter] "+m" (*(volatile int *)&v->counter),
+ [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
:: "memory" );
-#endif
return c;
}
@@ -193,34 +185,26 @@ static inline int atomic_dec_return(atomic_t *v)
static inline int atomic_dec_and_test(atomic_t *v)
{
- bool_t c;
+ bool c;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; decl %0"
- : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
+ asm volatile ( "lock; decl %[counter]\n\t"
+ ASM_FLAG_OUT(, "setz %[zf]\n\t")
+ : [counter] "+m" (*(volatile int *)&v->counter),
+ [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
:: "memory" );
-#else
- asm volatile ( "lock; decl %0; setz %1"
- : "+m" (*(volatile int *)&v->counter), "=qm" (c)
- :: "memory" );
-#endif
return c;
}
static inline int atomic_add_negative(int i, atomic_t *v)
{
- bool_t c;
-
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; addl %2,%0"
- : "+m" (*(volatile int *)&v->counter), "=@ccs" (c)
- : "ir" (i) : "memory" );
-#else
- asm volatile ( "lock; addl %2,%0; sets %1"
- : "+m" (*(volatile int *)&v->counter), "=qm" (c)
- : "ir" (i) : "memory" );
-#endif
+ bool c;
+
+ asm volatile ( "lock; addl %[i], %[counter]\n\t"
+ ASM_FLAG_OUT(, "sets %[sf]\n\t")
+ : [counter] "+m" (*(volatile int *)&v->counter),
+ [sf] ASM_FLAG_OUT("=@ccs", "=qm") (c)
+ : [i] "ir" (i) : "memory" );
return c;
}
diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h
index 0f18645..440abb7 100644
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -144,13 +144,10 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; btsl %2,%1"
- : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "lock; btsl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "lock; btsl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -172,15 +169,10 @@ static inline int __test_and_set_bit(int nr, void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "btsl %2,%1"
- : "=@ccc" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "btsl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "btsl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -201,13 +193,10 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; btrl %2,%1"
- : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "lock; btrl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "lock; btrl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -229,15 +218,10 @@ static inline int __test_and_clear_bit(int nr, void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "btrl %2,%1"
- : "=@ccc" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "btrl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "btrl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -251,15 +235,10 @@ static inline int __test_and_change_bit(int nr, void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "btcl %2,%1"
- : "=@ccc" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "btcl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (*(int *)addr)
- : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "btcl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -280,13 +259,10 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "lock; btcl %2,%1"
- : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
- asm volatile ( "lock; btcl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "lock; btcl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+ [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
return oldbit;
}
@@ -305,15 +281,10 @@ static inline int variable_test_bit(int nr, const volatile void *addr)
{
int oldbit;
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
- asm volatile ( "btl %2,%1"
- : "=@ccc" (oldbit)
- : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
-#else
- asm volatile ( "btl %2,%1\n\tsbbl %0,%0"
- : "=r" (oldbit)
- : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
-#endif
+ asm volatile ( "btl %[nr], %[addr]\n\t"
+ ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+ : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit)
+ : [addr] "m" (CONST_ADDR), [nr] "Ir" (nr) : "memory" );
return oldbit;
}
diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
index e800709..16aeeea 100644
--- a/xen/include/xen/compiler.h
+++ b/xen/include/xen/compiler.h
@@ -94,4 +94,10 @@
__asm__ ("" : "=r"(__ptr) : "0"(ptr)); \
(typeof(ptr)) (__ptr + (off)); })
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define ASM_FLAG_OUT(yes, no) yes
+#else
+# define ASM_FLAG_OUT(yes, no) no
+#endif
+
#endif /* __LINUX_COMPILER_H */
--
2.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
2017-02-15 16:53 ` Andrew Cooper
@ 2017-02-15 16:59 ` Jan Beulich
0 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2017-02-15 16:59 UTC (permalink / raw)
To: Andrew Cooper; +Cc: Xen-devel
>>> On 15.02.17 at 17:53, <andrew.cooper3@citrix.com> wrote:
> bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
> result in forward declarations of the bitops.h contents. Move
> ASM_FLAG_OUT()
> to a new asm/compiler.h to compensate.
>
> While making changes, switch bool_t to bool and use named asm parameters.
>
> No functional change.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2017-02-15 16:59 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-15 14:22 [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs Andrew Cooper
2017-02-15 14:25 ` Andrew Cooper
2017-02-15 14:55 ` Jan Beulich
2017-02-15 15:13 ` Jan Beulich
2017-02-15 16:00 ` Andrew Cooper
2017-02-15 16:08 ` Jan Beulich
2017-02-15 16:53 ` Andrew Cooper
2017-02-15 16:59 ` Jan Beulich
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.