All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
@ 2017-02-15 14:22 Andrew Cooper
  2017-02-15 14:25 ` Andrew Cooper
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Andrew Cooper @ 2017-02-15 14:22 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich

bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
result in forward declarations of the bitops.h contents.  Move ASM_FLAG_OUT()
to a new asm/compiler.h to compensate.

While making changes, switch bool_t to bool and use named asm parameters.

No (intended) functional change.  Without GCC flags, the disassembly is
identical before and after this patch.  With GCC flags however, this patch
seems to cause GCC to make instruction scheduling decisions.

The first place with significant changes in the disassembly is the the middle
of do_domctl(), which switches from:

    lock decl 0x1d8(%rax)
    sete   %r14b
    mov    $0xffffffffffffffea,%rbx
    test   %r14b,%r14b
    je     ffff82d0801034d4

to:

    lock decl 0x1d8(%rax)
    mov    $0x0,%r14d
    mov    $0xffffffffffffffea,%rbx
    jne    ffff82d0801034d4

avoiding the use of %r14d as an intermediate for calculating the conditional
jump, freeing it up for later use.  As a result, the compiled code is a little
more efficient and smaller overall.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>

I have confirmed using:

diff --git a/xen/include/asm-x86/atomic.h b/xen/include/asm-x86/atomic.h
index ef7e70b..de79443 100644
--- a/xen/include/asm-x86/atomic.h
+++ b/xen/include/asm-x86/atomic.h
@@ -197,7 +197,7 @@ static inline int atomic_dec_and_test(atomic_t *v)

 #ifdef __GCC_ASM_FLAG_OUTPUTS__
     asm volatile ( "lock; decl %0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
+                   : "+m" (*(volatile int *)&v->counter), "=@ccnz" (c)
                    :: "memory" );
 #else
     asm volatile ( "lock; decl %0; setz %1"

that compiler explicitly chose to insert the sete instruction despite the
"=@ccz" condition.  The resulting hypervisor does work perfectly well, so I
guess this will be down to some internal details of GCC's code generation.
---
 xen/include/asm-x86/asm_defns.h |  6 ---
 xen/include/asm-x86/atomic.h    | 64 ++++++++++++-------------------
 xen/include/asm-x86/bitops.h    | 85 ++++++++++++++---------------------------
 xen/include/asm-x86/compiler.h  | 20 ++++++++++
 xen/include/asm-x86/config.h    |  1 +
 5 files changed, 73 insertions(+), 103 deletions(-)
 create mode 100644 xen/include/asm-x86/compiler.h

diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 220ae2e..f1c6fa1 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -413,10 +413,4 @@ static always_inline void stac(void)
 #define REX64_PREFIX "rex64/"
 #endif
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define ASM_FLAG_OUT(yes, no) yes
-#else
-# define ASM_FLAG_OUT(yes, no) no
-#endif
-
 #endif /* __X86_ASM_DEFNS_H__ */
diff --git a/xen/include/asm-x86/atomic.h b/xen/include/asm-x86/atomic.h
index ef7e70b..d2a311c 100644
--- a/xen/include/asm-x86/atomic.h
+++ b/xen/include/asm-x86/atomic.h
@@ -133,17 +133,13 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 
 static inline int atomic_sub_and_test(int i, atomic_t *v)
 {
-    bool_t c;
-
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; subl %2,%0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
-                   : "ir" (i) : "memory" );
-#else
-    asm volatile ( "lock; subl %2,%0; setz %1"
-                   : "+m" (*(volatile int *)&v->counter), "=qm" (c)
-                   : "ir" (i) : "memory" );
-#endif
+    bool c;
+
+    asm volatile ( "lock; subl %[i], %[counter]\n\t"
+                   ASM_FLAG_OUT(, "setz %[zf]\n\t")
+                   : [counter] "+m" (*(volatile int *)&v->counter),
+                     [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
+                   : [i] "ir" (i) : "memory" );
 
     return c;
 }
@@ -163,17 +159,13 @@ static inline int atomic_inc_return(atomic_t *v)
 
 static inline int atomic_inc_and_test(atomic_t *v)
 {
-    bool_t c;
+    bool c;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; incl %0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
-                   :: "memory" );
-#else
-    asm volatile ( "lock; incl %0; setz %1"
-                   : "+m" (*(volatile int *)&v->counter), "=qm" (c)
+    asm volatile ( "lock; incl %[counter]\n\t"
+                   ASM_FLAG_OUT(, "setz %[zf]\n\t")
+                   : [counter] "+m" (*(volatile int *)&v->counter),
+                     [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
                    :: "memory" );
-#endif
 
     return c;
 }
@@ -193,34 +185,26 @@ static inline int atomic_dec_return(atomic_t *v)
 
 static inline int atomic_dec_and_test(atomic_t *v)
 {
-    bool_t c;
+    bool c;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; decl %0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
+    asm volatile ( "lock; decl %[counter]\n\t"
+                   ASM_FLAG_OUT(, "setz %[zf]\n\t")
+                   : [counter] "+m" (*(volatile int *)&v->counter),
+                     [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
                    :: "memory" );
-#else
-    asm volatile ( "lock; decl %0; setz %1"
-                   : "+m" (*(volatile int *)&v->counter), "=qm" (c)
-                   :: "memory" );
-#endif
 
     return c;
 }
 
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
-    bool_t c;
-
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; addl %2,%0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccs" (c)
-                   : "ir" (i) : "memory" );
-#else
-    asm volatile ( "lock; addl %2,%0; sets %1"
-                   : "+m" (*(volatile int *)&v->counter), "=qm" (c)
-                   : "ir" (i) : "memory" );
-#endif
+    bool c;
+
+    asm volatile ( "lock; addl %[i], %[counter]\n\t"
+                   ASM_FLAG_OUT(, "sets %[sf]\n\t")
+                   : [counter] "+m" (*(volatile int *)&v->counter),
+                     [sf] ASM_FLAG_OUT("=@ccs", "=qm") (c)
+                   : [i] "ir" (i) : "memory" );
 
     return c;
 }
diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h
index 0f18645..440abb7 100644
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -144,13 +144,10 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; btsl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "lock; btsl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "lock; btsl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -172,15 +169,10 @@ static inline int __test_and_set_bit(int nr, void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "btsl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "btsl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "btsl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -201,13 +193,10 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; btrl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "lock; btrl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "lock; btrl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -229,15 +218,10 @@ static inline int __test_and_clear_bit(int nr, void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "btrl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "btrl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "btrl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -251,15 +235,10 @@ static inline int __test_and_change_bit(int nr, void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "btcl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "btcl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "btcl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -280,13 +259,10 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; btcl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "lock; btcl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "lock; btcl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -305,15 +281,10 @@ static inline int variable_test_bit(int nr, const volatile void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "btl %2,%1"
-                   : "=@ccc" (oldbit)
-                   : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "btl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit)
-                   : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "btl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit)
+                   : [addr] "m" (CONST_ADDR), [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
diff --git a/xen/include/asm-x86/compiler.h b/xen/include/asm-x86/compiler.h
new file mode 100644
index 0000000..1fd6a8d
--- /dev/null
+++ b/xen/include/asm-x86/compiler.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_X86_COMPILER_H__
+#define __ASM_X86_COMPILER_H__
+
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define ASM_FLAG_OUT(yes, no) yes
+#else
+# define ASM_FLAG_OUT(yes, no) no
+#endif
+
+#endif /* __ASM_X86_COMPILER_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 6fd84e7..b48a0de 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -107,6 +107,7 @@ extern unsigned char boot_edid_info[128];
 #define asmlinkage
 
 #include <xen/const.h>
+#include <asm/compiler.h>
 
 #define PML4_ENTRY_BITS  39
 #define PML4_ENTRY_BYTES (_AC(1,UL) << PML4_ENTRY_BITS)
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
  2017-02-15 14:22 [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs Andrew Cooper
@ 2017-02-15 14:25 ` Andrew Cooper
  2017-02-15 14:55 ` Jan Beulich
  2017-02-15 16:53 ` Andrew Cooper
  2 siblings, 0 replies; 8+ messages in thread
From: Andrew Cooper @ 2017-02-15 14:25 UTC (permalink / raw)
  To: Xen-devel; +Cc: Jan Beulich

On 15/02/17 14:22, Andrew Cooper wrote:
> bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
> result in forward declarations of the bitops.h contents.  Move ASM_FLAG_OUT()
> to a new asm/compiler.h to compensate.
>
> While making changes, switch bool_t to bool and use named asm parameters.
>
> No (intended) functional change.  Without GCC flags, the disassembly is
> identical before and after this patch.  With GCC flags however, this patch
> seems to cause GCC to make instruction scheduling decisions.

This is supposed to read "make better instruction...".

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
  2017-02-15 14:22 [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs Andrew Cooper
  2017-02-15 14:25 ` Andrew Cooper
@ 2017-02-15 14:55 ` Jan Beulich
  2017-02-15 15:13   ` Jan Beulich
  2017-02-15 16:00   ` Andrew Cooper
  2017-02-15 16:53 ` Andrew Cooper
  2 siblings, 2 replies; 8+ messages in thread
From: Jan Beulich @ 2017-02-15 14:55 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel

>>> On 15.02.17 at 15:22, <andrew.cooper3@citrix.com> wrote:
> bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
> result in forward declarations of the bitops.h contents.  Move ASM_FLAG_OUT()
> to a new asm/compiler.h to compensate.
> 
> While making changes, switch bool_t to bool and use named asm parameters.
> 
> No (intended) functional change.  Without GCC flags, the disassembly is
> identical before and after this patch.  With GCC flags however, this patch
> seems to cause GCC to make instruction scheduling decisions.
> 
> The first place with significant changes in the disassembly is the the middle
> of do_domctl(), which switches from:
> 
>     lock decl 0x1d8(%rax)
>     sete   %r14b
>     mov    $0xffffffffffffffea,%rbx
>     test   %r14b,%r14b
>     je     ffff82d0801034d4
> 
> to:
> 
>     lock decl 0x1d8(%rax)
>     mov    $0x0,%r14d
>     mov    $0xffffffffffffffea,%rbx
>     jne    ffff82d0801034d4
> 
> avoiding the use of %r14d as an intermediate for calculating the conditional
> jump, freeing it up for later use.  As a result, the compiled code is a little
> more efficient and smaller overall.

I have to admit that I find this rather surprising, especially since
(leaving the naming of the asm() operands aside) the preprocessed
source should not change afaict. It being unexpected I think calls
for better understanding what's going on here.

> --- a/xen/include/asm-x86/config.h
> +++ b/xen/include/asm-x86/config.h
> @@ -107,6 +107,7 @@ extern unsigned char boot_edid_info[128];
>  #define asmlinkage
>  
>  #include <xen/const.h>
> +#include <asm/compiler.h>

Why here? You need it in bitops.h and maybe (it's not really used
there) asm_defns.h only for now.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
  2017-02-15 14:55 ` Jan Beulich
@ 2017-02-15 15:13   ` Jan Beulich
  2017-02-15 16:00   ` Andrew Cooper
  1 sibling, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2017-02-15 15:13 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel

>>> On 15.02.17 at 15:55, <JBeulich@suse.com> wrote:
>>>> On 15.02.17 at 15:22, <andrew.cooper3@citrix.com> wrote:
>> bitops.h cannot include asm_defns.h, because the static inlines in 
> cpumasks.h
>> result in forward declarations of the bitops.h contents.  Move 
> ASM_FLAG_OUT()
>> to a new asm/compiler.h to compensate.
>> 
>> While making changes, switch bool_t to bool and use named asm parameters.
>> 
>> No (intended) functional change.  Without GCC flags, the disassembly is
>> identical before and after this patch.  With GCC flags however, this patch
>> seems to cause GCC to make instruction scheduling decisions.
>> 
>> The first place with significant changes in the disassembly is the the 
> middle
>> of do_domctl(), which switches from:
>> 
>>     lock decl 0x1d8(%rax)
>>     sete   %r14b
>>     mov    $0xffffffffffffffea,%rbx
>>     test   %r14b,%r14b
>>     je     ffff82d0801034d4
>> 
>> to:
>> 
>>     lock decl 0x1d8(%rax)
>>     mov    $0x0,%r14d
>>     mov    $0xffffffffffffffea,%rbx
>>     jne    ffff82d0801034d4
>> 
>> avoiding the use of %r14d as an intermediate for calculating the conditional
>> jump, freeing it up for later use.  As a result, the compiled code is a 
> little
>> more efficient and smaller overall.
> 
> I have to admit that I find this rather surprising, especially since
> (leaving the naming of the asm() operands aside) the preprocessed
> source should not change afaict. It being unexpected I think calls
> for better understanding what's going on here.

This is a result from the bool_t -> bool change and has nothing to do
with the use of ASM_FLAG_OUT(). There's also no difference of
produced code in a non-debug build.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
  2017-02-15 14:55 ` Jan Beulich
  2017-02-15 15:13   ` Jan Beulich
@ 2017-02-15 16:00   ` Andrew Cooper
  2017-02-15 16:08     ` Jan Beulich
  1 sibling, 1 reply; 8+ messages in thread
From: Andrew Cooper @ 2017-02-15 16:00 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel

On 15/02/17 14:55, Jan Beulich wrote:
>>>> On 15.02.17 at 15:22, <andrew.cooper3@citrix.com> wrote:
>> bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
>> result in forward declarations of the bitops.h contents.  Move ASM_FLAG_OUT()
>> to a new asm/compiler.h to compensate.
>>
>> While making changes, switch bool_t to bool and use named asm parameters.
>>
>> No (intended) functional change.  Without GCC flags, the disassembly is
>> identical before and after this patch.  With GCC flags however, this patch
>> seems to cause GCC to make instruction scheduling decisions.
>>
>> The first place with significant changes in the disassembly is the the middle
>> of do_domctl(), which switches from:
>>
>>     lock decl 0x1d8(%rax)
>>     sete   %r14b
>>     mov    $0xffffffffffffffea,%rbx
>>     test   %r14b,%r14b
>>     je     ffff82d0801034d4
>>
>> to:
>>
>>     lock decl 0x1d8(%rax)
>>     mov    $0x0,%r14d
>>     mov    $0xffffffffffffffea,%rbx
>>     jne    ffff82d0801034d4
>>
>> avoiding the use of %r14d as an intermediate for calculating the conditional
>> jump, freeing it up for later use.  As a result, the compiled code is a little
>> more efficient and smaller overall.
> I have to admit that I find this rather surprising, especially since
> (leaving the naming of the asm() operands aside) the preprocessed
> source should not change afaict. It being unexpected I think calls
> for better understanding what's going on here.

I presume you are happy now given your other discovery?

>
>> --- a/xen/include/asm-x86/config.h
>> +++ b/xen/include/asm-x86/config.h
>> @@ -107,6 +107,7 @@ extern unsigned char boot_edid_info[128];
>>  #define asmlinkage
>>  
>>  #include <xen/const.h>
>> +#include <asm/compiler.h>
> Why here? You need it in bitops.h and maybe (it's not really used
> there) asm_defns.h only for now.

I am looking to avoid duplicating the definition, and this seemed like
an appropriate place to live.

Would you instead prefer me to duplicate it in bitops?

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
  2017-02-15 16:00   ` Andrew Cooper
@ 2017-02-15 16:08     ` Jan Beulich
  0 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2017-02-15 16:08 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel

>>> On 15.02.17 at 17:00, <andrew.cooper3@citrix.com> wrote:
> On 15/02/17 14:55, Jan Beulich wrote:
>>>>> On 15.02.17 at 15:22, <andrew.cooper3@citrix.com> wrote:
>>> bitops.h cannot include asm_defns.h, because the static inlines in 
> cpumasks.h
>>> result in forward declarations of the bitops.h contents.  Move 
> ASM_FLAG_OUT()
>>> to a new asm/compiler.h to compensate.
>>>
>>> While making changes, switch bool_t to bool and use named asm parameters.
>>>
>>> No (intended) functional change.  Without GCC flags, the disassembly is
>>> identical before and after this patch.  With GCC flags however, this patch
>>> seems to cause GCC to make instruction scheduling decisions.
>>>
>>> The first place with significant changes in the disassembly is the the 
> middle
>>> of do_domctl(), which switches from:
>>>
>>>     lock decl 0x1d8(%rax)
>>>     sete   %r14b
>>>     mov    $0xffffffffffffffea,%rbx
>>>     test   %r14b,%r14b
>>>     je     ffff82d0801034d4
>>>
>>> to:
>>>
>>>     lock decl 0x1d8(%rax)
>>>     mov    $0x0,%r14d
>>>     mov    $0xffffffffffffffea,%rbx
>>>     jne    ffff82d0801034d4
>>>
>>> avoiding the use of %r14d as an intermediate for calculating the conditional
>>> jump, freeing it up for later use.  As a result, the compiled code is a 
> little
>>> more efficient and smaller overall.
>> I have to admit that I find this rather surprising, especially since
>> (leaving the naming of the asm() operands aside) the preprocessed
>> source should not change afaict. It being unexpected I think calls
>> for better understanding what's going on here.
> 
> I presume you are happy now given your other discovery?

Yes, albeit the commit message may want refining a little.

>>> --- a/xen/include/asm-x86/config.h
>>> +++ b/xen/include/asm-x86/config.h
>>> @@ -107,6 +107,7 @@ extern unsigned char boot_edid_info[128];
>>>  #define asmlinkage
>>>  
>>>  #include <xen/const.h>
>>> +#include <asm/compiler.h>
>> Why here? You need it in bitops.h and maybe (it's not really used
>> there) asm_defns.h only for now.
> 
> I am looking to avoid duplicating the definition, and this seemed like
> an appropriate place to live.
> 
> Would you instead prefer me to duplicate it in bitops?

Duplicate what? The macro? No. The new header is fine, just that
I'd rather not see it included from config.h. And btw., other than
when Sergey wanted to put in a non-x86 header, instead of
introducing asm/compiler.h here, I think I would then agree to
simply put it in xen/compiler.h and be done once and for all.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
  2017-02-15 14:22 [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs Andrew Cooper
  2017-02-15 14:25 ` Andrew Cooper
  2017-02-15 14:55 ` Jan Beulich
@ 2017-02-15 16:53 ` Andrew Cooper
  2017-02-15 16:59   ` Jan Beulich
  2 siblings, 1 reply; 8+ messages in thread
From: Andrew Cooper @ 2017-02-15 16:53 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich

bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
result in forward declarations of the bitops.h contents.  Move ASM_FLAG_OUT()
to a new asm/compiler.h to compensate.

While making changes, switch bool_t to bool and use named asm parameters.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>

v2:
 * Move ASM_FLAG_OUT() to xen/compiler.h rather than introducing
   asm-x86/compiler.h
 * Drop a lot of the commit message.
---
 xen/include/asm-x86/asm_defns.h |  6 ---
 xen/include/asm-x86/atomic.h    | 64 ++++++++++++-------------------
 xen/include/asm-x86/bitops.h    | 85 ++++++++++++++---------------------------
 xen/include/xen/compiler.h      |  6 +++
 4 files changed, 58 insertions(+), 103 deletions(-)

diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 220ae2e..f1c6fa1 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -413,10 +413,4 @@ static always_inline void stac(void)
 #define REX64_PREFIX "rex64/"
 #endif
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define ASM_FLAG_OUT(yes, no) yes
-#else
-# define ASM_FLAG_OUT(yes, no) no
-#endif
-
 #endif /* __X86_ASM_DEFNS_H__ */
diff --git a/xen/include/asm-x86/atomic.h b/xen/include/asm-x86/atomic.h
index ef7e70b..d2a311c 100644
--- a/xen/include/asm-x86/atomic.h
+++ b/xen/include/asm-x86/atomic.h
@@ -133,17 +133,13 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 
 static inline int atomic_sub_and_test(int i, atomic_t *v)
 {
-    bool_t c;
-
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; subl %2,%0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
-                   : "ir" (i) : "memory" );
-#else
-    asm volatile ( "lock; subl %2,%0; setz %1"
-                   : "+m" (*(volatile int *)&v->counter), "=qm" (c)
-                   : "ir" (i) : "memory" );
-#endif
+    bool c;
+
+    asm volatile ( "lock; subl %[i], %[counter]\n\t"
+                   ASM_FLAG_OUT(, "setz %[zf]\n\t")
+                   : [counter] "+m" (*(volatile int *)&v->counter),
+                     [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
+                   : [i] "ir" (i) : "memory" );
 
     return c;
 }
@@ -163,17 +159,13 @@ static inline int atomic_inc_return(atomic_t *v)
 
 static inline int atomic_inc_and_test(atomic_t *v)
 {
-    bool_t c;
+    bool c;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; incl %0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
-                   :: "memory" );
-#else
-    asm volatile ( "lock; incl %0; setz %1"
-                   : "+m" (*(volatile int *)&v->counter), "=qm" (c)
+    asm volatile ( "lock; incl %[counter]\n\t"
+                   ASM_FLAG_OUT(, "setz %[zf]\n\t")
+                   : [counter] "+m" (*(volatile int *)&v->counter),
+                     [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
                    :: "memory" );
-#endif
 
     return c;
 }
@@ -193,34 +185,26 @@ static inline int atomic_dec_return(atomic_t *v)
 
 static inline int atomic_dec_and_test(atomic_t *v)
 {
-    bool_t c;
+    bool c;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; decl %0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccz" (c)
+    asm volatile ( "lock; decl %[counter]\n\t"
+                   ASM_FLAG_OUT(, "setz %[zf]\n\t")
+                   : [counter] "+m" (*(volatile int *)&v->counter),
+                     [zf] ASM_FLAG_OUT("=@ccz", "=qm") (c)
                    :: "memory" );
-#else
-    asm volatile ( "lock; decl %0; setz %1"
-                   : "+m" (*(volatile int *)&v->counter), "=qm" (c)
-                   :: "memory" );
-#endif
 
     return c;
 }
 
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
-    bool_t c;
-
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; addl %2,%0"
-                   : "+m" (*(volatile int *)&v->counter), "=@ccs" (c)
-                   : "ir" (i) : "memory" );
-#else
-    asm volatile ( "lock; addl %2,%0; sets %1"
-                   : "+m" (*(volatile int *)&v->counter), "=qm" (c)
-                   : "ir" (i) : "memory" );
-#endif
+    bool c;
+
+    asm volatile ( "lock; addl %[i], %[counter]\n\t"
+                   ASM_FLAG_OUT(, "sets %[sf]\n\t")
+                   : [counter] "+m" (*(volatile int *)&v->counter),
+                     [sf] ASM_FLAG_OUT("=@ccs", "=qm") (c)
+                   : [i] "ir" (i) : "memory" );
 
     return c;
 }
diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h
index 0f18645..440abb7 100644
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -144,13 +144,10 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; btsl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "lock; btsl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "lock; btsl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -172,15 +169,10 @@ static inline int __test_and_set_bit(int nr, void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "btsl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "btsl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "btsl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -201,13 +193,10 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; btrl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "lock; btrl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "lock; btrl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -229,15 +218,10 @@ static inline int __test_and_clear_bit(int nr, void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "btrl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "btrl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "btrl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -251,15 +235,10 @@ static inline int __test_and_change_bit(int nr, void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "btcl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "btcl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (*(int *)addr)
-                   : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "btcl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -280,13 +259,10 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "lock; btcl %2,%1"
-                   : "=@ccc" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "lock; btcl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "lock; btcl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
+                     [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
@@ -305,15 +281,10 @@ static inline int variable_test_bit(int nr, const volatile void *addr)
 {
     int oldbit;
 
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-    asm volatile ( "btl %2,%1"
-                   : "=@ccc" (oldbit)
-                   : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
-#else
-    asm volatile ( "btl %2,%1\n\tsbbl %0,%0"
-                   : "=r" (oldbit)
-                   : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
-#endif
+    asm volatile ( "btl %[nr], %[addr]\n\t"
+                   ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
+                   : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit)
+                   : [addr] "m" (CONST_ADDR), [nr] "Ir" (nr) : "memory" );
 
     return oldbit;
 }
diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
index e800709..16aeeea 100644
--- a/xen/include/xen/compiler.h
+++ b/xen/include/xen/compiler.h
@@ -94,4 +94,10 @@
     __asm__ ("" : "=r"(__ptr) : "0"(ptr));      \
     (typeof(ptr)) (__ptr + (off)); })
 
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define ASM_FLAG_OUT(yes, no) yes
+#else
+# define ASM_FLAG_OUT(yes, no) no
+#endif
+
 #endif /* __LINUX_COMPILER_H */
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs
  2017-02-15 16:53 ` Andrew Cooper
@ 2017-02-15 16:59   ` Jan Beulich
  0 siblings, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2017-02-15 16:59 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel

>>> On 15.02.17 at 17:53, <andrew.cooper3@citrix.com> wrote:
> bitops.h cannot include asm_defns.h, because the static inlines in cpumasks.h
> result in forward declarations of the bitops.h contents.  Move 
> ASM_FLAG_OUT()
> to a new asm/compiler.h to compensate.
> 
> While making changes, switch bool_t to bool and use named asm parameters.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-02-15 16:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-15 14:22 [PATCH] x86/asm: Use ASM_FLAG_OUT() to simplify atomic and bitop stubs Andrew Cooper
2017-02-15 14:25 ` Andrew Cooper
2017-02-15 14:55 ` Jan Beulich
2017-02-15 15:13   ` Jan Beulich
2017-02-15 16:00   ` Andrew Cooper
2017-02-15 16:08     ` Jan Beulich
2017-02-15 16:53 ` Andrew Cooper
2017-02-15 16:59   ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.