All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] x86: Introduce ASM flags to bitops
@ 2015-07-27 17:48 Uros Bizjak
  2015-07-27 19:04 ` ASM flags in general H. Peter Anvin
  0 siblings, 1 reply; 14+ messages in thread
From: Uros Bizjak @ 2015-07-27 17:48 UTC (permalink / raw)
  To: linux-kernel; +Cc: x86, Uros Bizjak, Ingo Molnar

From: Uros Bizjak <ubizjak@gmail.com>

This patch introduces GCC ASM flags to bitops.

The new functionality depends on __GCC_ASM_FLAG_OUTPUTS__ preprocessor
symbol, which is automatically set by GCC version 6 or later when
ASM flags feature is supported.

The improvement can be illustrated with following code snipped.
Instead of:

   136d7:	48 0f a3 3d 00 00 00 	bt     %rdi,0x0(%rip)
   136de:	00
   136df:	19 ff                	sbb    %edi,%edi
   136e1:	85 ff                	test   %edi,%edi
   136e3:	0f 95 c0             	setne  %al

following code is generated:

   13767:	48 0f a3 3d 00 00 00 	bt     %rdi,0x0(%rip)
   1376e:	00
   1376f:	0f 92 c0             	setb   %al

Similar improvement can be seen in following code:

    7a6c:	48 0f a3 11          	bt     %rdx,(%rcx)
    7a70:	19 d2                	sbb    %edx,%edx
    7a72:	85 d2                	test   %edx,%edx
    7a74:	74 eb                	je     7a61

which becomes:

    7a8c:	48 0f a3 11          	bt     %rdx,(%rcx)
    7a90:	73 ef                	jae    7a81

v2:
- Conditionally define CC_HAVE_ASM_FLAG_OUTPUTS in compiler-gcc.h
and use it instead of __GCC_ASM_FLAG_OUTPUTS__

Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
 arch/x86/include/asm/bitops.h      | 26 ++++++++++++++++++++++++--
 arch/x86/include/asm/percpu.h      | 18 +++++++++++++++++-
 arch/x86/include/asm/signal.h      |  6 ++++++
 arch/x86/include/asm/sync_bitops.h | 18 ++++++++++++++++++
 include/linux/compiler-gcc.h       |  9 +++++++++
 5 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index cfe3b95..3b66d88 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -232,10 +232,16 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm("bts %2,%1"
+	    : "=@ccc" (oldbit), ADDR
+	    : "Ir" (nr));
+#else
 	asm("bts %2,%1\n\t"
 	    "sbb %0,%0"
 	    : "=r" (oldbit), ADDR
 	    : "Ir" (nr));
+#endif
 	return oldbit;
 }
 
@@ -272,10 +278,16 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm volatile("btr %2,%1"
+		     : "=@ccc" (oldbit), ADDR
+		     : "Ir" (nr));
+#else
 	asm volatile("btr %2,%1\n\t"
 		     "sbb %0,%0"
 		     : "=r" (oldbit), ADDR
 		     : "Ir" (nr));
+#endif
 	return oldbit;
 }
 
@@ -284,11 +296,16 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm volatile("btc %2,%1"
+		     : "=@ccc" (oldbit), ADDR
+		     : "Ir" (nr) : "memory");
+#else
 	asm volatile("btc %2,%1\n\t"
 		     "sbb %0,%0"
 		     : "=r" (oldbit), ADDR
 		     : "Ir" (nr) : "memory");
-
+#endif
 	return oldbit;
 }
 
@@ -315,11 +332,16 @@ static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
 {
 	int oldbit;
 
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm volatile("bt %2,%1"
+		     : "=@ccc" (oldbit)
+		     : "m" (*(unsigned long *)addr), "Ir" (nr));
+#else
 	asm volatile("bt %2,%1\n\t"
 		     "sbb %0,%0"
 		     : "=r" (oldbit)
 		     : "m" (*(unsigned long *)addr), "Ir" (nr));
-
+#endif
 	return oldbit;
 }
 
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e0ba66c..4879f36 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -508,6 +508,16 @@ do {									\
 #endif
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+#define x86_test_and_clear_bit_percpu(bit, var)				\
+({									\
+	int old__;							\
+	asm volatile("btr %2,"__percpu_arg(1)				\
+		     : "=@ccc" (old__), "+m" (var)			\
+		     : "dIr" (bit));					\
+	old__;								\
+})
+#else
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\
 	int old__;							\
@@ -516,6 +526,7 @@ do {									\
 		     : "dIr" (bit));					\
 	old__;								\
 })
+#endif
 
 static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
                         const unsigned long __percpu *addr)
@@ -534,11 +545,16 @@ static inline int x86_this_cpu_variable_test_bit(int nr,
 {
 	int oldbit;
 
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm volatile("bt "__percpu_arg(2)",%1"
+			: "=@ccc" (oldbit)
+			: "m" (*(unsigned long *)addr), "Ir" (nr));
+#else
 	asm volatile("bt "__percpu_arg(2)",%1\n\t"
 			"sbb %0,%0"
 			: "=r" (oldbit)
 			: "m" (*(unsigned long *)addr), "Ir" (nr));
-
+#endif
 	return oldbit;
 }
 
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 31eab86..1792de8 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -82,8 +82,14 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
 static inline int __gen_sigismember(sigset_t *set, int _sig)
 {
 	int ret;
+
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm("btl %2,%1"
+	    : "=@ccc"(ret) : "m"(*set), "Ir"(_sig-1));
+#else
 	asm("btl %2,%1\n\tsbbl %0,%0"
 	    : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+#endif
 	return ret;
 }
 
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index f28a24b..0af45ee 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -81,9 +81,15 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm volatile("lock; bts %2,%1"
+		     : "=@ccc" (oldbit), "+m" (ADDR)
+		     : "Ir" (nr) : "memory");
+#else
 	asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
 		     : "=r" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
+#endif
 	return oldbit;
 }
 
@@ -99,9 +105,15 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm volatile("lock; btr %2,%1"
+		     : "=@ccc" (oldbit), "+m" (ADDR)
+		     : "Ir" (nr) : "memory");
+#else
 	asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
 		     : "=r" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
+#endif
 	return oldbit;
 }
 
@@ -117,9 +129,15 @@ static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr
 {
 	int oldbit;
 
+#ifdef CC_HAVE_ASM_FLAG_OUTPUTS
+	asm volatile("lock; btc %2,%1"
+		     : "=@ccc" (oldbit), "+m" (ADDR)
+		     : "Ir" (nr) : "memory");
+#else
 	asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
 		     : "=r" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
+#endif
 	return oldbit;
 }
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index dfaa7b3..0feb83e 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -122,6 +122,15 @@
 #define __maybe_unused		__attribute__((unused))
 #define __always_unused		__attribute__((unused))
 
+/*
+ * The compiler defines __GCC_ASM_FLAG_OUTPUTS__ when a special form
+ * of output operand exists by which conditions in the flags register
+ * may be outputs of the asm.
+ */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define CC_HAVE_ASM_FLAG_OUTPUTS
+#endif
+
 /* gcc version specific checks */
 
 #if GCC_VERSION < 30200
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* ASM flags in general
  2015-07-27 17:48 [PATCH v2] x86: Introduce ASM flags to bitops Uros Bizjak
@ 2015-07-27 19:04 ` H. Peter Anvin
  2015-07-27 20:01   ` Uros Bizjak
  2015-07-27 20:38   ` Andy Lutomirski
  0 siblings, 2 replies; 14+ messages in thread
From: H. Peter Anvin @ 2015-07-27 19:04 UTC (permalink / raw)
  To: Uros Bizjak, linux-kernel; +Cc: x86, Uros Bizjak, Ingo Molnar

On 07/27/2015 10:48 AM, Uros Bizjak wrote:
> From: Uros Bizjak <ubizjak@gmail.com>
> 
> This patch introduces GCC ASM flags to bitops.
> 
> The new functionality depends on __GCC_ASM_FLAG_OUTPUTS__ preprocessor
> symbol, which is automatically set by GCC version 6 or later when
> ASM flags feature is supported.
> 
> The improvement can be illustrated with following code snipped.
> Instead of:
> 

This might be a good time to bring up this proposal I made in private
recently in public:

In the next version (6.0) of gcc, there will be support for emitting
flags as outputs from inline assembly.  We obviously want to use this,
but I think it would be better to not have to have two copies for each
bit of assembly, for old and new gcc.

I have been thinking about it, and have checked with how gcc actually
handles "bool" as outputs.  It turns out that using "bool" in asm output
is perfectly well defined on x86, and is defined as a byte output which
is required to be 0 or 1.  This is consistent with how the "set"
instruction operates.

As such, I would like to propose the following:

1. We change the actual type of the output variable for these asm
statements to bool.
2. Define the following macros:

#ifdef __GCC_ASM_FLAGS_OUTPUT__

#define CC_SET(c)
#define CC_OUT(c) "=@cc" #c

#else

#define CC_SET(c) "\n\tset" #c " %[cc_" #c "]"
#define CC_OUT(c) [cc_ ## c] "=qm" (v)

#endif


A sample use would be something like this:

unsigned long x, y, z;
bool carry;

asm("add %3,%0"
    CC_SET(c)
    : "=r" (z), CC_OUT(c) (carry)
    : "0" (x), "rm" (y));



I wonder if using "set" would be a performance regression over "sbb" for
the existing bitops, in which case it would slot quite nicely into this
scheme.

I have been working in the background on a patchset for this, but as my
life is incredibly crazy at the moment progress has been slow.  However,
this is the way I'd like to see a patchset, with appropriate tests for
regression in between:

1. A patchset that uses "bool" for this type of boolean outputs.
2. A patchset to change "sbb" to "set" where appropriate.
3. Introducing the above macros.
4. Using the macros where it makes sense.

Linus also commented, quite correctly:

"That said, I think we should look at actually changing some of the
functions we use.

For example, I think cmpxchg really should use ZF once we have flag
outputs rather than comparing old and new.  That changes the whole way
we use it.

There may be other places where we might want to make those kind of
bigger changes."

Those are the kind of places where having per-site conditional code
makes sense.


	-hpa




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 19:04 ` ASM flags in general H. Peter Anvin
@ 2015-07-27 20:01   ` Uros Bizjak
  2015-07-27 21:14     ` H. Peter Anvin
  2015-07-27 20:38   ` Andy Lutomirski
  1 sibling, 1 reply; 14+ messages in thread
From: Uros Bizjak @ 2015-07-27 20:01 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Uros Bizjak, linux-kernel, x86, Ingo Molnar

On Mon, Jul 27, 2015 at 9:04 PM, H. Peter Anvin <hpa@zytor.com> wrote:

> I wonder if using "set" would be a performance regression over "sbb" for
> the existing bitops, in which case it would slot quite nicely into this
> scheme.

As far as I have looked into the compiled code, following sequence was
produced when the value was directly used as bool

00000000000136d0 <__static_cpu_has_safe>:
   136d0:    55                       push   %rbp
   136d1:    0f b7 ff                 movzwl %di,%edi
   136d4:    48 89 e5                 mov    %rsp,%rbp
   136d7:    48 0f a3 3d 00 00 00     bt     %rdi,0x0(%rip)        #
136df <__static_cpu_has_safe+0xf>
   136de:    00
            136db: R_X86_64_PC32    boot_cpu_data+0x10
   136df:    19 ff                    sbb    %edi,%edi
   136e1:    85 ff                    test   %edi,%edi
   136e3:    0f 95 c0                 setne  %al
   136e6:    5d                       pop    %rbp
   136e7:    c3                       retq

vs. new sequence:

0000000000013760 <__static_cpu_has_safe>:
   13760:    55                       push   %rbp
   13761:    0f b7 ff                 movzwl %di,%edi
   13764:    48 89 e5                 mov    %rsp,%rbp
   13767:    48 0f a3 3d 00 00 00     bt     %rdi,0x0(%rip)        #
1376f <__static_cpu_has_safe+0xf>
   1376e:    00
            1376b: R_X86_64_PC32    boot_cpu_data+0x10
   1376f:    0f 92 c0                 setb   %al
   13772:    5d                       pop    %rbp
   13773:    c3                       retq

Actually, I have to search for the code above, in vast majority of
cases, BT is just followed by a conditional jump.

Uros.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 19:04 ` ASM flags in general H. Peter Anvin
  2015-07-27 20:01   ` Uros Bizjak
@ 2015-07-27 20:38   ` Andy Lutomirski
  2015-07-27 21:04     ` H. Peter Anvin
  1 sibling, 1 reply; 14+ messages in thread
From: Andy Lutomirski @ 2015-07-27 20:38 UTC (permalink / raw)
  To: H. Peter Anvin, Uros Bizjak, linux-kernel; +Cc: x86, Uros Bizjak, Ingo Molnar

On 07/27/2015 12:04 PM, H. Peter Anvin wrote:
> On 07/27/2015 10:48 AM, Uros Bizjak wrote:
>> From: Uros Bizjak <ubizjak@gmail.com>
>>
>> This patch introduces GCC ASM flags to bitops.
>>
>> The new functionality depends on __GCC_ASM_FLAG_OUTPUTS__ preprocessor
>> symbol, which is automatically set by GCC version 6 or later when
>> ASM flags feature is supported.
>>
>> The improvement can be illustrated with following code snipped.
>> Instead of:
>>
>
> This might be a good time to bring up this proposal I made in private
> recently in public:
>
> In the next version (6.0) of gcc, there will be support for emitting
> flags as outputs from inline assembly.  We obviously want to use this,
> but I think it would be better to not have to have two copies for each
> bit of assembly, for old and new gcc.
>
> I have been thinking about it, and have checked with how gcc actually
> handles "bool" as outputs.  It turns out that using "bool" in asm output
> is perfectly well defined on x86, and is defined as a byte output which
> is required to be 0 or 1.  This is consistent with how the "set"
> instruction operates.
>
> As such, I would like to propose the following:
>
> 1. We change the actual type of the output variable for these asm
> statements to bool.
> 2. Define the following macros:
>
> #ifdef __GCC_ASM_FLAGS_OUTPUT__
>
> #define CC_SET(c)
> #define CC_OUT(c) "=@cc" #c
>
> #else
>
> #define CC_SET(c) "\n\tset" #c " %[cc_" #c "]"
> #define CC_OUT(c) [cc_ ## c] "=qm" (v)
>
> #endif
>
>
> A sample use would be something like this:
>
> unsigned long x, y, z;
> bool carry;
>
> asm("add %3,%0"
>      CC_SET(c)
>      : "=r" (z), CC_OUT(c) (carry)
>      : "0" (x), "rm" (y));
>
>
>
> I wonder if using "set" would be a performance regression over "sbb" for
> the existing bitops, in which case it would slot quite nicely into this
> scheme.
>
> I have been working in the background on a patchset for this, but as my
> life is incredibly crazy at the moment progress has been slow.  However,
> this is the way I'd like to see a patchset, with appropriate tests for
> regression in between:
>
> 1. A patchset that uses "bool" for this type of boolean outputs.
> 2. A patchset to change "sbb" to "set" where appropriate.
> 3. Introducing the above macros.
> 4. Using the macros where it makes sense.
>
> Linus also commented, quite correctly:
>
> "That said, I think we should look at actually changing some of the
> functions we use.
>
> For example, I think cmpxchg really should use ZF once we have flag
> outputs rather than comparing old and new.  That changes the whole way
> we use it.
>
> There may be other places where we might want to make those kind of
> bigger changes."
>

As long as we're thinking about this stuff, there are bunch of places 
where we use exception fixups and do awful things involving translating 
them to error codes.  Ideally they'd use as goto instead, but last time 
I checked, GCC was quite picky and didn't like output constraints and 
asm goto at the same time.  Maybe GCC could fix this at some point, but 
using condition code outputs might be reasonable, too.

Doing this would make put_user_ex and similar completely unnecessary, I 
think.

--Andy



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 20:38   ` Andy Lutomirski
@ 2015-07-27 21:04     ` H. Peter Anvin
  2015-07-27 22:43       ` Andy Lutomirski
  0 siblings, 1 reply; 14+ messages in thread
From: H. Peter Anvin @ 2015-07-27 21:04 UTC (permalink / raw)
  To: Andy Lutomirski, Uros Bizjak, linux-kernel; +Cc: x86, Uros Bizjak, Ingo Molnar

On 07/27/2015 01:38 PM, Andy Lutomirski wrote:
> 
> As long as we're thinking about this stuff, there are bunch of places
> where we use exception fixups and do awful things involving translating
> them to error codes.  Ideally they'd use as goto instead, but last time
> I checked, GCC was quite picky and didn't like output constraints and
> asm goto at the same time.  Maybe GCC could fix this at some point, but
> using condition code outputs might be reasonable, too.
> 
> Doing this would make put_user_ex and similar completely unnecessary, I
> think.
> 

No, I think this is wrong.  Exceptions and flags are almost each others
opposites.  Since C doesn't have native exception handling (except
setjmp/longjmp) we pretty much hack it.

asm goto() would indeed be the better way to do this, but again, would
in most cases require asm goto to support outputs.

However, get_user_ex and put_user_ex we really don't want to go away.
They produce extremely efficient code -- just a bunch of mov operations
-- for the common path, and that's the way we like it.

That being said, there probably are a couple of patterns where we could
do, say "stc" in the exception path, and emit CF as an output:

bool err;
int errno;

asm volatile("xor %1,%1\n"	/* Clears CF */
	     "1: something %3,%0\n"/* Leaves CF unchanged, or clears */
	     "2:\n"
             ".section .fixup.\"ax\"\n"
	     "3: mov %4,%1\n"
	     "   stc\n"
	     "   jmp 2b"
	     _ASM_EXTABLE(1b,3b)
            : "=X" (output), "=r" (errno), "=@ccc" (err)
	    : "Y" (input), "i" (-EIO));

This would make "err" immediately testable.  However, it also might make
gcc generate extra code to save and restore err, since it wouldn't
understand the invariant that err = !!errno.

	-hpa


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 20:01   ` Uros Bizjak
@ 2015-07-27 21:14     ` H. Peter Anvin
  0 siblings, 0 replies; 14+ messages in thread
From: H. Peter Anvin @ 2015-07-27 21:14 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Uros Bizjak, linux-kernel, x86, Ingo Molnar

On 07/27/2015 01:01 PM, Uros Bizjak wrote:
> On Mon, Jul 27, 2015 at 9:04 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> 
>> I wonder if using "set" would be a performance regression over "sbb" for
>> the existing bitops, in which case it would slot quite nicely into this
>> scheme.
> 
> As far as I have looked into the compiled code, following sequence was
> produced when the value was directly used as bool
> 
  [...]
> 
> vs. new sequence:
> 

You misunderstood me: I was referring to *old* versions of gcc (≤ 5); in
order words: can we use the macros I proposed instead of #ifdef?  For
gcc 6+ we obviously want to use the flags output.

	-hpa


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 21:04     ` H. Peter Anvin
@ 2015-07-27 22:43       ` Andy Lutomirski
       [not found]         ` <BFA94A6B-8E68-4990-8737-F1F470D47F6A@zytor.com>
  0 siblings, 1 reply; 14+ messages in thread
From: Andy Lutomirski @ 2015-07-27 22:43 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Uros Bizjak, linux-kernel, X86 ML, Uros Bizjak,
	Ingo Molnar

On Mon, Jul 27, 2015 at 2:04 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 07/27/2015 01:38 PM, Andy Lutomirski wrote:
>>
>> As long as we're thinking about this stuff, there are bunch of places
>> where we use exception fixups and do awful things involving translating
>> them to error codes.  Ideally they'd use as goto instead, but last time
>> I checked, GCC was quite picky and didn't like output constraints and
>> asm goto at the same time.  Maybe GCC could fix this at some point, but
>> using condition code outputs might be reasonable, too.
>>
>> Doing this would make put_user_ex and similar completely unnecessary, I
>> think.
>>
>
> No, I think this is wrong.  Exceptions and flags are almost each others
> opposites.  Since C doesn't have native exception handling (except
> setjmp/longjmp) we pretty much hack it.
>
> asm goto() would indeed be the better way to do this, but again, would
> in most cases require asm goto to support outputs.
>
> However, get_user_ex and put_user_ex we really don't want to go away.
> They produce extremely efficient code -- just a bunch of mov operations
> -- for the common path, and that's the way we like it.

Wouldn't asm goto be just as good (assuming it supported the right
constraints)?  In principle, this:

if (put_user(...))
  goto error;
if (put_user(...))
  goto error;

should optimize to:

mov [...]
_ASM_EXTABLE(...)
mov [...]
_ASM_EXTABLE(...)

...

extable_landing_pad:
   jmp error

IOW, I think that GCC's optimizer should be good enough to keep the
error paths out of line and maybe even to coalesce them,

>
> That being said, there probably are a couple of patterns where we could
> do, say "stc" in the exception path, and emit CF as an output:
>
> bool err;
> int errno;
>
> asm volatile("xor %1,%1\n"      /* Clears CF */
>              "1: something %3,%0\n"/* Leaves CF unchanged, or clears */
>              "2:\n"
>              ".section .fixup.\"ax\"\n"
>              "3: mov %4,%1\n"
>              "   stc\n"
>              "   jmp 2b"
>              _ASM_EXTABLE(1b,3b)
>             : "=X" (output), "=r" (errno), "=@ccc" (err)
>             : "Y" (input), "i" (-EIO));
>
> This would make "err" immediately testable.  However, it also might make
> gcc generate extra code to save and restore err, since it wouldn't
> understand the invariant that err = !!errno.

Yeah, this wouldn't be as good as asm goto.

--Andy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
       [not found]         ` <BFA94A6B-8E68-4990-8737-F1F470D47F6A@zytor.com>
@ 2015-07-27 23:36           ` Andy Lutomirski
       [not found]             ` <E2EE4512-30B2-4E73-B91A-DC1A9AA0AF6C@zytor.com>
  0 siblings, 1 reply; 14+ messages in thread
From: Andy Lutomirski @ 2015-07-27 23:36 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Uros Bizjak, linux-kernel, X86 ML, Uros Bizjak,
	Ingo Molnar

On Mon, Jul 27, 2015 at 4:22 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> For that to work, gcc would have to know about the extable.

It could, I think:

asm goto (
    "1: mov ...\n\t"
    _ASM_EXTABLE(1b, %l2)  /* or whatever index it is */
    : ... : ... : ... : efault);

return 0;

efault:
     return -EFAULT;

I think that wrmsr_safe could get this treatment with current GCC.
put_user plausibly could, too, if we were willing to mark it volatile
and accept that we're lying a little bit about the lack of an output
constraint.  get_user would need GCC to understand output constraints
for real.

--Andy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
       [not found]             ` <E2EE4512-30B2-4E73-B91A-DC1A9AA0AF6C@zytor.com>
@ 2015-07-27 23:49               ` Andy Lutomirski
  2015-07-27 23:56                 ` H. Peter Anvin
  0 siblings, 1 reply; 14+ messages in thread
From: Andy Lutomirski @ 2015-07-27 23:49 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Uros Bizjak, linux-kernel, X86 ML, Uros Bizjak,
	Ingo Molnar

On Mon, Jul 27, 2015 at 4:46 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Sure, but that is different than getting rid of the _ex forms.
>

If we did that and got rid of the _ex forms, though, then the code
that matters (the no-fault case) would just be a bunch of movs, right?
 That's basically the same as the current _ex code.

--Andy

> On July 27, 2015 4:36:26 PM PDT, Andy Lutomirski <luto@amacapital.net>
> wrote:
>>
>> On Mon, Jul 27, 2015 at 4:22 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>>>
>>>  For that to work, gcc would have to know about the extable.
>>
>>
>> It could, I think:
>>
>> asm goto (
>>     "1: mov ...\n\t"
>>     _ASM_EXTABLE(1b, %l2)  /* or whatever index it is */
>>     : ... : ... : ... : efault);
>>
>> return 0;
>>
>> efault:
>>      return -EFAULT;
>>
>> I think that wrmsr_safe could get this treatment with current GCC.
>> put_user plausibly could, too, if we were willing to mark it volatile
>> and accept that we're lying a little bit about the lack of an output
>> constraint.  get_user would need GCC to understand output constraints
>> for real.
>>
>> --Andy
>
>
> --
> Sent from my Android device with K-9 Mail. Please excuse my brevity.



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 23:49               ` Andy Lutomirski
@ 2015-07-27 23:56                 ` H. Peter Anvin
  2015-07-27 23:58                   ` Andy Lutomirski
  0 siblings, 1 reply; 14+ messages in thread
From: H. Peter Anvin @ 2015-07-27 23:56 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Uros Bizjak, linux-kernel, X86 ML, Uros Bizjak,
	Ingo Molnar

Sure... but now you have to wrap things in stac/clac.  I'm not sure I see the point since the code is already pretty much optimal.

On July 27, 2015 4:49:46 PM PDT, Andy Lutomirski <luto@amacapital.net> wrote:
>On Mon, Jul 27, 2015 at 4:46 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>> Sure, but that is different than getting rid of the _ex forms.
>>
>
>If we did that and got rid of the _ex forms, though, then the code
>that matters (the no-fault case) would just be a bunch of movs, right?
> That's basically the same as the current _ex code.
>
>--Andy
>
>> On July 27, 2015 4:36:26 PM PDT, Andy Lutomirski
><luto@amacapital.net>
>> wrote:
>>>
>>> On Mon, Jul 27, 2015 at 4:22 PM, H. Peter Anvin <hpa@zytor.com>
>wrote:
>>>>
>>>>  For that to work, gcc would have to know about the extable.
>>>
>>>
>>> It could, I think:
>>>
>>> asm goto (
>>>     "1: mov ...\n\t"
>>>     _ASM_EXTABLE(1b, %l2)  /* or whatever index it is */
>>>     : ... : ... : ... : efault);
>>>
>>> return 0;
>>>
>>> efault:
>>>      return -EFAULT;
>>>
>>> I think that wrmsr_safe could get this treatment with current GCC.
>>> put_user plausibly could, too, if we were willing to mark it
>volatile
>>> and accept that we're lying a little bit about the lack of an output
>>> constraint.  get_user would need GCC to understand output
>constraints
>>> for real.
>>>
>>> --Andy
>>
>>
>> --
>> Sent from my Android device with K-9 Mail. Please excuse my brevity.

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 23:56                 ` H. Peter Anvin
@ 2015-07-27 23:58                   ` Andy Lutomirski
  2015-07-28  0:03                     ` H. Peter Anvin
  2015-07-28  0:35                     ` H. Peter Anvin
  0 siblings, 2 replies; 14+ messages in thread
From: Andy Lutomirski @ 2015-07-27 23:58 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Uros Bizjak, linux-kernel, X86 ML, Uros Bizjak,
	Ingo Molnar

On Mon, Jul 27, 2015 at 4:56 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> Sure... but now you have to wrap things in stac/clac.  I'm not sure I see the point since the code is already pretty much optimal.

Ick.  I forgot about that.

It would benefit all the users who aren't using the _ex functions,
though.  And, if there are enough of those, it just might be worth
trying to postprocess object files to collapse adjacent stac and clac
instances, or just to teach the alternatives code to nop them out if
they're redundant.

--Andy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 23:58                   ` Andy Lutomirski
@ 2015-07-28  0:03                     ` H. Peter Anvin
  2015-07-28  0:35                     ` H. Peter Anvin
  1 sibling, 0 replies; 14+ messages in thread
From: H. Peter Anvin @ 2015-07-28  0:03 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Uros Bizjak, linux-kernel, X86 ML, Uros Bizjak,
	Ingo Molnar

There are certainly applications.  That just isn't one of them.

On July 27, 2015 4:58:29 PM PDT, Andy Lutomirski <luto@amacapital.net> wrote:
>On Mon, Jul 27, 2015 at 4:56 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>> Sure... but now you have to wrap things in stac/clac.  I'm not sure I
>see the point since the code is already pretty much optimal.
>
>Ick.  I forgot about that.
>
>It would benefit all the users who aren't using the _ex functions,
>though.  And, if there are enough of those, it just might be worth
>trying to postprocess object files to collapse adjacent stac and clac
>instances, or just to teach the alternatives code to nop them out if
>they're redundant.
>
>--Andy

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-27 23:58                   ` Andy Lutomirski
  2015-07-28  0:03                     ` H. Peter Anvin
@ 2015-07-28  0:35                     ` H. Peter Anvin
  2015-07-28  0:41                       ` Andy Lutomirski
  1 sibling, 1 reply; 14+ messages in thread
From: H. Peter Anvin @ 2015-07-28  0:35 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Uros Bizjak, linux-kernel, X86 ML, Uros Bizjak,
	Ingo Molnar

However... perhaps we can do the flags stuff first?  There are certainly a ton of changes we ought to do.

On July 27, 2015 4:58:29 PM PDT, Andy Lutomirski <luto@amacapital.net> wrote:
>On Mon, Jul 27, 2015 at 4:56 PM, H. Peter Anvin <hpa@zytor.com> wrote:
>> Sure... but now you have to wrap things in stac/clac.  I'm not sure I
>see the point since the code is already pretty much optimal.
>
>Ick.  I forgot about that.
>
>It would benefit all the users who aren't using the _ex functions,
>though.  And, if there are enough of those, it just might be worth
>trying to postprocess object files to collapse adjacent stac and clac
>instances, or just to teach the alternatives code to nop them out if
>they're redundant.
>
>--Andy

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ASM flags in general
  2015-07-28  0:35                     ` H. Peter Anvin
@ 2015-07-28  0:41                       ` Andy Lutomirski
  0 siblings, 0 replies; 14+ messages in thread
From: Andy Lutomirski @ 2015-07-28  0:41 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andy Lutomirski, Uros Bizjak, linux-kernel, X86 ML, Uros Bizjak,
	Ingo Molnar

On Mon, Jul 27, 2015 at 5:35 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> However... perhaps we can do the flags stuff first?  There are certainly a ton of changes we ought to do.
>

Fine with me.  Flags are probably more generally useful, especially
because of the silly lack of output constraints on asm goto.

--Andy

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-07-28  0:42 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-27 17:48 [PATCH v2] x86: Introduce ASM flags to bitops Uros Bizjak
2015-07-27 19:04 ` ASM flags in general H. Peter Anvin
2015-07-27 20:01   ` Uros Bizjak
2015-07-27 21:14     ` H. Peter Anvin
2015-07-27 20:38   ` Andy Lutomirski
2015-07-27 21:04     ` H. Peter Anvin
2015-07-27 22:43       ` Andy Lutomirski
     [not found]         ` <BFA94A6B-8E68-4990-8737-F1F470D47F6A@zytor.com>
2015-07-27 23:36           ` Andy Lutomirski
     [not found]             ` <E2EE4512-30B2-4E73-B91A-DC1A9AA0AF6C@zytor.com>
2015-07-27 23:49               ` Andy Lutomirski
2015-07-27 23:56                 ` H. Peter Anvin
2015-07-27 23:58                   ` Andy Lutomirski
2015-07-28  0:03                     ` H. Peter Anvin
2015-07-28  0:35                     ` H. Peter Anvin
2015-07-28  0:41                       ` Andy Lutomirski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.