linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] x86/asm: fix assembly constraints in bitops
@ 2019-04-02 11:28 Alexander Potapenko
  2019-04-02 11:33 ` Alexander Potapenko
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Alexander Potapenko @ 2019-04-02 11:28 UTC (permalink / raw)
  To: paulmck, hpa, peterz; +Cc: linux-kernel, dvyukov, jyknight, x86, mingo

1. Use memory clobber in bitops that touch arbitrary memory

Certain bit operations that read/write bits take a base pointer and an
arbitrarily large offset to address the bit relative to that base.
Inline assembly constraints aren't expressive enough to tell the
compiler that the assembly directive is going to touch a specific memory
location of unknown size, therefore we have to use the "memory" clobber
to indicate that the assembly is going to access memory locations other
than those listed in the inputs/outputs.
To indicate that BTR/BTS instructions don't necessarily touch the first
sizeof(long) bytes of the argument, we also move the address to assembly
inputs.

This particular change leads to size increase of 124 kernel functions in
a defconfig build. For some of them the diff is in NOP operations, other
end up re-reading values from memory and may potentially slow down the
execution. But without these clobbers the compiler is free to cache
the contents of the bitmaps and use them as if they weren't changed by
the inline assembly.

2. Use byte-sized arguments for operations touching single bytes.

Passing a long value to ANDB/ORB/XORB instructions makes the compiler
treat sizeof(long) bytes as being clobbered, which isn't the case. This
may theoretically lead to worse code in the case of heavy optimization.

Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: James Y Knight <jyknight@google.com>
---
 v2:
  -- renamed the patch
  -- addressed comment by Peter Zijlstra: don't use "+m" for functions
  returning void
  -- fixed input types for operations touching single bytes
---
 arch/x86/include/asm/bitops.h | 41 +++++++++++++++--------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index d153d570bb04..c0cb9c5d3476 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,16 +36,17 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
 
-#define ADDR				BITOP_ADDR(addr)
+#define ADDR				RLONG_ADDR(addr)
 
 /*
  * We do the locked ops that don't return the old value as
  * a mask operation on a byte.
  */
 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr)	WBYTE_ADDR((void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 /**
@@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
 			: "memory");
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
-			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+	asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)~CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
 
 static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
 	bool negative;
 	asm volatile(LOCK_PREFIX "andb %2,%1"
 		CC_SET(s)
-		: CC_OUT(s) (negative), ADDR
+		: CC_OUT(s) (negative), WBYTE_ADDR(addr)
 		: "ir" ((char) ~(1 << nr)) : "memory");
 	return negative;
 }
@@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
  * __clear_bit() is non-atomic and implies release semantics before the memory
  * operation. It can be used for an unlock if no other CPUs can concurrently
  * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
  */
 static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
-	barrier();
 	__clear_bit(nr, addr);
 }
 
@@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
  */
 static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 
 	asm(__ASM_SIZE(bts) " %2,%1"
 	    CC_SET(c)
-	    : CC_OUT(c) (oldbit), ADDR
-	    : "Ir" (nr));
+	    : CC_OUT(c) (oldbit)
+	    : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 
 	asm volatile(__ASM_SIZE(btr) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr));
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 
 	asm volatile(__ASM_SIZE(btc) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr) : "memory");
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 
 	return oldbit;
 }
@@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 	asm volatile(__ASM_SIZE(bt) " %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit)
-		     : "m" (*(unsigned long *)addr), "Ir" (nr));
+		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
 
 	return oldbit;
 }
-- 
2.21.0.392.gf8f6787159e-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/asm: fix assembly constraints in bitops
  2019-04-02 11:28 [PATCH v2] x86/asm: fix assembly constraints in bitops Alexander Potapenko
@ 2019-04-02 11:33 ` Alexander Potapenko
  2019-04-02 11:45 ` David Laight
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Alexander Potapenko @ 2019-04-02 11:33 UTC (permalink / raw)
  To: Paul McKenney, H. Peter Anvin, Peter Zijlstra
  Cc: LKML, Dmitriy Vyukov, James Y Knight, x86, Ingo Molnar

On Tue, Apr 2, 2019 at 1:28 PM Alexander Potapenko <glider@google.com> wrote:
>
> 1. Use memory clobber in bitops that touch arbitrary memory
>
> Certain bit operations that read/write bits take a base pointer and an
> arbitrarily large offset to address the bit relative to that base.
> Inline assembly constraints aren't expressive enough to tell the
> compiler that the assembly directive is going to touch a specific memory
> location of unknown size, therefore we have to use the "memory" clobber
> to indicate that the assembly is going to access memory locations other
> than those listed in the inputs/outputs.
> To indicate that BTR/BTS instructions don't necessarily touch the first
> sizeof(long) bytes of the argument, we also move the address to assembly
> inputs.
>
> This particular change leads to size increase of 124 kernel functions in
> a defconfig build. For some of them the diff is in NOP operations, other
> end up re-reading values from memory and may potentially slow down the
> execution. But without these clobbers the compiler is free to cache
> the contents of the bitmaps and use them as if they weren't changed by
> the inline assembly.
>
> 2. Use byte-sized arguments for operations touching single bytes.
>
> Passing a long value to ANDB/ORB/XORB instructions makes the compiler
> treat sizeof(long) bytes as being clobbered, which isn't the case. This
> may theoretically lead to worse code in the case of heavy optimization.
>
> Signed-off-by: Alexander Potapenko <glider@google.com>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> Cc: Paul E. McKenney <paulmck@linux.ibm.com>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: James Y Knight <jyknight@google.com>
> ---
>  v2:
>   -- renamed the patch
>   -- addressed comment by Peter Zijlstra: don't use "+m" for functions
>   returning void
>   -- fixed input types for operations touching single bytes
> ---
>  arch/x86/include/asm/bitops.h | 41 +++++++++++++++--------------------
>  1 file changed, 18 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
> index d153d570bb04..c0cb9c5d3476 100644
> --- a/arch/x86/include/asm/bitops.h
> +++ b/arch/x86/include/asm/bitops.h
> @@ -36,16 +36,17 @@
>   * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
>   */
>
> -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> +#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
> +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
>
> -#define ADDR                           BITOP_ADDR(addr)
> +#define ADDR                           RLONG_ADDR(addr)
>
>  /*
>   * We do the locked ops that don't return the old value as
>   * a mask operation on a byte.
>   */
>  #define IS_IMMEDIATE(nr)               (__builtin_constant_p(nr))
> -#define CONST_MASK_ADDR(nr, addr)      BITOP_ADDR((void *)(addr) + ((nr)>>3))
> +#define CONST_MASK_ADDR(nr, addr)      WBYTE_ADDR((void *)(addr) + ((nr)>>3))
>  #define CONST_MASK(nr)                 (1 << ((nr) & 7))
>
>  /**
> @@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
>                         : "memory");
>         } else {
>                 asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
> -                       : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
> +                       : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
>         }
>  }
>
> @@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
>   */
>  static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
>  {
> -       asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
> +       asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
>  }
>
>  /**
> @@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
>                         : "iq" ((u8)~CONST_MASK(nr)));
>         } else {
>                 asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
> -                       : BITOP_ADDR(addr)
> -                       : "Ir" (nr));
> +                       : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
>         }
>  }
>
> @@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
>
>  static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
>  {
> -       asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
> +       asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
>  }
>
>  static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
> @@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
>         bool negative;
>         asm volatile(LOCK_PREFIX "andb %2,%1"
>                 CC_SET(s)
> -               : CC_OUT(s) (negative), ADDR
> +               : CC_OUT(s) (negative), WBYTE_ADDR(addr)
>                 : "ir" ((char) ~(1 << nr)) : "memory");
>         return negative;
>  }
> @@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
>   * __clear_bit() is non-atomic and implies release semantics before the memory
>   * operation. It can be used for an unlock if no other CPUs can concurrently
>   * modify other bits in the word.
> - *
> - * No memory barrier is required here, because x86 cannot reorder stores past
> - * older loads. Same principle as spin_unlock.
>   */
>  static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
>  {
> -       barrier();
Should have reflected this in the patch description.
>         __clear_bit(nr, addr);
>  }
>
> @@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
>   */
>  static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
>  {
> -       asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
> +       asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
>  }
>
>  /**
> @@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
>                         : "iq" ((u8)CONST_MASK(nr)));
>         } else {
>                 asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
> -                       : BITOP_ADDR(addr)
> -                       : "Ir" (nr));
> +                       : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
>         }
>  }
>
> @@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
>
>         asm(__ASM_SIZE(bts) " %2,%1"
>             CC_SET(c)
> -           : CC_OUT(c) (oldbit), ADDR
> -           : "Ir" (nr));
> +           : CC_OUT(c) (oldbit)
> +           : ADDR, "Ir" (nr) : "memory");
>         return oldbit;
>  }
>
> @@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
>
>         asm volatile(__ASM_SIZE(btr) " %2,%1"
>                      CC_SET(c)
> -                    : CC_OUT(c) (oldbit), ADDR
> -                    : "Ir" (nr));
> +                    : CC_OUT(c) (oldbit)
> +                    : ADDR, "Ir" (nr) : "memory");
>         return oldbit;
>  }
>
> @@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
>
>         asm volatile(__ASM_SIZE(btc) " %2,%1"
>                      CC_SET(c)
> -                    : CC_OUT(c) (oldbit), ADDR
> -                    : "Ir" (nr) : "memory");
> +                    : CC_OUT(c) (oldbit)
> +                    : ADDR, "Ir" (nr) : "memory");
>
>         return oldbit;
>  }
> @@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
>         asm volatile(__ASM_SIZE(bt) " %2,%1"
>                      CC_SET(c)
>                      : CC_OUT(c) (oldbit)
> -                    : "m" (*(unsigned long *)addr), "Ir" (nr));
> +                    : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
>
>         return oldbit;
>  }
> --
> 2.21.0.392.gf8f6787159e-goog
>


-- 
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v2] x86/asm: fix assembly constraints in bitops
  2019-04-02 11:28 [PATCH v2] x86/asm: fix assembly constraints in bitops Alexander Potapenko
  2019-04-02 11:33 ` Alexander Potapenko
@ 2019-04-02 11:45 ` David Laight
  2019-04-02 12:35   ` Alexander Potapenko
  2019-04-05  9:39 ` Ingo Molnar
  2019-04-06  8:46 ` [tip:x86/urgent] x86/asm: Use stricter " tip-bot for Alexander Potapenko
  3 siblings, 1 reply; 10+ messages in thread
From: David Laight @ 2019-04-02 11:45 UTC (permalink / raw)
  To: 'Alexander Potapenko', paulmck, hpa, peterz
  Cc: linux-kernel, dvyukov, jyknight, x86, mingo

From: Alexander Potapenko
> Sent: 02 April 2019 12:28
> 
> 1. Use memory clobber in bitops that touch arbitrary memory
> 
> Certain bit operations that read/write bits take a base pointer and an
> arbitrarily large offset to address the bit relative to that base.

Although x86_64 can use a signed 64bit bit number, looking at arm and arm64
they use 'int nr' throughout as do the generic functions.
Maybe x86 ought to be consistent here.
I doubt negative bit numbers are expected to work?

Did you try telling gcc that a big buffer (250MB is the limit for 32bit)
from the pointer might be changed?
That ought to be softer than a full 'memory' clobber as it should
only affect memory that could be accessed through the pointer.

....
> -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> +#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
> +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
> 
> -#define ADDR				BITOP_ADDR(addr)
> +#define ADDR				RLONG_ADDR(addr)

Is it worth just killing ADDR ?
(as a different patch)

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/asm: fix assembly constraints in bitops
  2019-04-02 11:45 ` David Laight
@ 2019-04-02 12:35   ` Alexander Potapenko
  2019-04-02 12:37     ` Alexander Potapenko
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Potapenko @ 2019-04-02 12:35 UTC (permalink / raw)
  To: David Laight
  Cc: paulmck, hpa, peterz, linux-kernel, dvyukov, jyknight, x86, mingo

On Tue, Apr 2, 2019 at 1:44 PM David Laight <David.Laight@aculab.com> wrote:
>
> From: Alexander Potapenko
> > Sent: 02 April 2019 12:28
> >
> > 1. Use memory clobber in bitops that touch arbitrary memory
> >
> > Certain bit operations that read/write bits take a base pointer and an
> > arbitrarily large offset to address the bit relative to that base.
>
> Although x86_64 can use a signed 64bit bit number, looking at arm and arm64
> they use 'int nr' throughout as do the generic functions.
> Maybe x86 ought to be consistent here.
> I doubt negative bit numbers are expected to work?
I don't have a strong opinion on this, but the corresponding Intel
instructions do accept 64-bit operands.

> Did you try telling gcc that a big buffer (250MB is the limit for 32bit)
> from the pointer might be changed?
Yes, I did, see
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1966993.html
This still isn't a silver bullet, e.g. I saw an example where touching
a function parameter cast to a big buffer in the assembly resulted in
clobbering a global.
Moreover, one can imagine a situation where such a trick may be harmful, e.g.:

void foo(int size) {
  struct arr {
    long val[1U<<28];
  };
  long *bitmap = malloc(size);
  asm("#do something" : "+m"(*(struct arr*)bitmap);
  if (size < 1024)
    process(bitmap[size]);
}

If a (smart enough) compiler knows that malloc(size) returns a pointer
to |size| bytes in memory, it may assume that |size| is at least
1U<<28 (because otherwise it's incorrect to treat |bitmap| as a
pointer to a big array) and delete the size check.
This is of course a synthetic example, but not a completely impossible one.

> That ought to be softer than a full 'memory' clobber as it should
> only affect memory that could be accessed through the pointer.
>
> ....
> > -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> > +#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
> > +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
> >
> > -#define ADDR                         BITOP_ADDR(addr)
> > +#define ADDR                         RLONG_ADDR(addr)
>
> Is it worth just killing ADDR ?
> (as a different patch)
Agreed.
>         David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)



-- 
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/asm: fix assembly constraints in bitops
  2019-04-02 12:35   ` Alexander Potapenko
@ 2019-04-02 12:37     ` Alexander Potapenko
  0 siblings, 0 replies; 10+ messages in thread
From: Alexander Potapenko @ 2019-04-02 12:37 UTC (permalink / raw)
  To: David Laight
  Cc: paulmck, hpa, peterz, linux-kernel, dvyukov, jyknight, x86, mingo

On Tue, Apr 2, 2019 at 2:35 PM Alexander Potapenko <glider@google.com> wrote:
>
> On Tue, Apr 2, 2019 at 1:44 PM David Laight <David.Laight@aculab.com> wrote:
> >
> > From: Alexander Potapenko
> > > Sent: 02 April 2019 12:28
> > >
> > > 1. Use memory clobber in bitops that touch arbitrary memory
> > >
> > > Certain bit operations that read/write bits take a base pointer and an
> > > arbitrarily large offset to address the bit relative to that base.
> >
> > Although x86_64 can use a signed 64bit bit number, looking at arm and arm64
> > they use 'int nr' throughout as do the generic functions.
> > Maybe x86 ought to be consistent here.
> > I doubt negative bit numbers are expected to work?
> I don't have a strong opinion on this, but the corresponding Intel
> instructions do accept 64-bit operands.
>
> > Did you try telling gcc that a big buffer (250MB is the limit for 32bit)
> > from the pointer might be changed?
> Yes, I did, see
> https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1966993.html
> This still isn't a silver bullet, e.g. I saw an example where touching
> a function parameter cast to a big buffer in the assembly resulted in
> clobbering a global.
> Moreover, one can imagine a situation where such a trick may be harmful, e.g.:
>
> void foo(int size) {
>   struct arr {
>     long val[1U<<28];
>   };
>   long *bitmap = malloc(size);
>   asm("#do something" : "+m"(*(struct arr*)bitmap);
>   if (size < 1024)
>     process(bitmap[size]);
(let it be bitmap[size-2] so that we don't overflow the buffer)
> }
>
> If a (smart enough) compiler knows that malloc(size) returns a pointer
> to |size| bytes in memory, it may assume that |size| is at least
> 1U<<28 (because otherwise it's incorrect to treat |bitmap| as a
> pointer to a big array) and delete the size check.
> This is of course a synthetic example, but not a completely impossible one.
>
> > That ought to be softer than a full 'memory' clobber as it should
> > only affect memory that could be accessed through the pointer.
> >
> > ....
> > > -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> > > +#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
> > > +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
> > >
> > > -#define ADDR                         BITOP_ADDR(addr)
> > > +#define ADDR                         RLONG_ADDR(addr)
> >
> > Is it worth just killing ADDR ?
> > (as a different patch)
> Agreed.
> >         David
> >
> > -
> > Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> > Registration No: 1397386 (Wales)
>
>
>
> --
> Alexander Potapenko
> Software Engineer
>
> Google Germany GmbH
> Erika-Mann-Straße, 33
> 80636 München
>
> Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
> Registergericht und -nummer: Hamburg, HRB 86891
> Sitz der Gesellschaft: Hamburg



-- 
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/asm: fix assembly constraints in bitops
  2019-04-02 11:28 [PATCH v2] x86/asm: fix assembly constraints in bitops Alexander Potapenko
  2019-04-02 11:33 ` Alexander Potapenko
  2019-04-02 11:45 ` David Laight
@ 2019-04-05  9:39 ` Ingo Molnar
  2019-04-05 11:12   ` David Laight
  2019-04-05 11:53   ` Alexander Potapenko
  2019-04-06  8:46 ` [tip:x86/urgent] x86/asm: Use stricter " tip-bot for Alexander Potapenko
  3 siblings, 2 replies; 10+ messages in thread
From: Ingo Molnar @ 2019-04-05  9:39 UTC (permalink / raw)
  To: Alexander Potapenko
  Cc: paulmck, hpa, peterz, linux-kernel, dvyukov, jyknight, x86,
	mingo, Peter Zijlstra, Linus Torvalds, Borislav Petkov


* Alexander Potapenko <glider@google.com> wrote:

> 1. Use memory clobber in bitops that touch arbitrary memory
> 
> Certain bit operations that read/write bits take a base pointer and an
> arbitrarily large offset to address the bit relative to that base.
> Inline assembly constraints aren't expressive enough to tell the
> compiler that the assembly directive is going to touch a specific memory
> location of unknown size, therefore we have to use the "memory" clobber
> to indicate that the assembly is going to access memory locations other
> than those listed in the inputs/outputs.
> To indicate that BTR/BTS instructions don't necessarily touch the first
> sizeof(long) bytes of the argument, we also move the address to assembly
> inputs.
> 
> This particular change leads to size increase of 124 kernel functions in
> a defconfig build. For some of them the diff is in NOP operations, other
> end up re-reading values from memory and may potentially slow down the
> execution. But without these clobbers the compiler is free to cache
> the contents of the bitmaps and use them as if they weren't changed by
> the inline assembly.
> 
> 2. Use byte-sized arguments for operations touching single bytes.
> 
> Passing a long value to ANDB/ORB/XORB instructions makes the compiler
> treat sizeof(long) bytes as being clobbered, which isn't the case. This
> may theoretically lead to worse code in the case of heavy optimization.
> 
> Signed-off-by: Alexander Potapenko <glider@google.com>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> Cc: Paul E. McKenney <paulmck@linux.ibm.com>
> Cc: H. Peter Anvin <hpa@zytor.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: James Y Knight <jyknight@google.com>
> ---
>  v2:
>   -- renamed the patch
>   -- addressed comment by Peter Zijlstra: don't use "+m" for functions
>   returning void
>   -- fixed input types for operations touching single bytes
> ---
>  arch/x86/include/asm/bitops.h | 41 +++++++++++++++--------------------
>  1 file changed, 18 insertions(+), 23 deletions(-)

I'm wondering what the primary motivation for the patch is:

 - Does it fix an actual miscompilation, or only a theoretical miscompilation?

 - If it fixes an existing miscompilation:

   - Does it fix a miscompilation triggered by current/future versions of GCC?
   - Does it fix a miscompilation triggered by current/future versions of Clang?

 - Also, is the miscompilation triggered by 'usual' kernel configs, or 
   does it require exotics such as weird debug options or GCC plugins, 
   etc?

I.e. a bit more context would be useful.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH v2] x86/asm: fix assembly constraints in bitops
  2019-04-05  9:39 ` Ingo Molnar
@ 2019-04-05 11:12   ` David Laight
  2019-04-05 11:53   ` Alexander Potapenko
  1 sibling, 0 replies; 10+ messages in thread
From: David Laight @ 2019-04-05 11:12 UTC (permalink / raw)
  To: 'Ingo Molnar', Alexander Potapenko
  Cc: paulmck, hpa, peterz, linux-kernel, dvyukov, jyknight, x86,
	mingo, Peter Zijlstra, Linus Torvalds, Borislav Petkov

From: Ingo Molnar
> Sent: 05 April 2019 10:40
> 
> * Alexander Potapenko <glider@google.com> wrote:
> 
> > 1. Use memory clobber in bitops that touch arbitrary memory
> >
> > Certain bit operations that read/write bits take a base pointer and an
> > arbitrarily large offset to address the bit relative to that base.
> > Inline assembly constraints aren't expressive enough to tell the
> > compiler that the assembly directive is going to touch a specific memory
> > location of unknown size, therefore we have to use the "memory" clobber
> > to indicate that the assembly is going to access memory locations other
> > than those listed in the inputs/outputs.
> > To indicate that BTR/BTS instructions don't necessarily touch the first
> > sizeof(long) bytes of the argument, we also move the address to assembly
> > inputs.
> >
> > This particular change leads to size increase of 124 kernel functions in
> > a defconfig build. For some of them the diff is in NOP operations, other
> > end up re-reading values from memory and may potentially slow down the
> > execution. But without these clobbers the compiler is free to cache
> > the contents of the bitmaps and use them as if they weren't changed by
> > the inline assembly.
> >
> > 2. Use byte-sized arguments for operations touching single bytes.
> >
> > Passing a long value to ANDB/ORB/XORB instructions makes the compiler
> > treat sizeof(long) bytes as being clobbered, which isn't the case. This
> > may theoretically lead to worse code in the case of heavy optimization.
> >
...
> 
> I'm wondering what the primary motivation for the patch is:
> 
>  - Does it fix an actual miscompilation, or only a theoretical miscompilation?
> 
>  - If it fixes an existing miscompilation:
> 
>    - Does it fix a miscompilation triggered by current/future versions of GCC?
>    - Does it fix a miscompilation triggered by current/future versions of Clang?
> 
>  - Also, is the miscompilation triggered by 'usual' kernel configs, or
>    does it require exotics such as weird debug options or GCC plugins,
>    etc?
> 
> I.e. a bit more context would be useful.

The missing memory clobber (change 1) can cause very difficult to debug bugs.
Simple things like gcc deciding to inline a function can change the order
of memory accesses.
Having the wrong just isn't worth the trouble it can cause.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/asm: fix assembly constraints in bitops
  2019-04-05  9:39 ` Ingo Molnar
  2019-04-05 11:12   ` David Laight
@ 2019-04-05 11:53   ` Alexander Potapenko
  2019-04-06  8:20     ` Ingo Molnar
  1 sibling, 1 reply; 10+ messages in thread
From: Alexander Potapenko @ 2019-04-05 11:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Paul McKenney, H. Peter Anvin, Peter Zijlstra, LKML,
	Dmitriy Vyukov, James Y Knight, x86, Ingo Molnar, Peter Zijlstra,
	Linus Torvalds, Borislav Petkov

On Fri, Apr 5, 2019 at 11:39 AM Ingo Molnar <mingo@kernel.org> wrote:
>
>
> * Alexander Potapenko <glider@google.com> wrote:
>
> > 1. Use memory clobber in bitops that touch arbitrary memory
> >
> > Certain bit operations that read/write bits take a base pointer and an
> > arbitrarily large offset to address the bit relative to that base.
> > Inline assembly constraints aren't expressive enough to tell the
> > compiler that the assembly directive is going to touch a specific memory
> > location of unknown size, therefore we have to use the "memory" clobber
> > to indicate that the assembly is going to access memory locations other
> > than those listed in the inputs/outputs.
> > To indicate that BTR/BTS instructions don't necessarily touch the first
> > sizeof(long) bytes of the argument, we also move the address to assembly
> > inputs.
> >
> > This particular change leads to size increase of 124 kernel functions in
> > a defconfig build. For some of them the diff is in NOP operations, other
> > end up re-reading values from memory and may potentially slow down the
> > execution. But without these clobbers the compiler is free to cache
> > the contents of the bitmaps and use them as if they weren't changed by
> > the inline assembly.
> >
> > 2. Use byte-sized arguments for operations touching single bytes.
> >
> > Passing a long value to ANDB/ORB/XORB instructions makes the compiler
> > treat sizeof(long) bytes as being clobbered, which isn't the case. This
> > may theoretically lead to worse code in the case of heavy optimization.
> >
> > Signed-off-by: Alexander Potapenko <glider@google.com>
> > Cc: Dmitry Vyukov <dvyukov@google.com>
> > Cc: Paul E. McKenney <paulmck@linux.ibm.com>
> > Cc: H. Peter Anvin <hpa@zytor.com>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Cc: James Y Knight <jyknight@google.com>
> > ---
> >  v2:
> >   -- renamed the patch
> >   -- addressed comment by Peter Zijlstra: don't use "+m" for functions
> >   returning void
> >   -- fixed input types for operations touching single bytes
> > ---
> >  arch/x86/include/asm/bitops.h | 41 +++++++++++++++--------------------
> >  1 file changed, 18 insertions(+), 23 deletions(-)
>
> I'm wondering what the primary motivation for the patch is:
>
>  - Does it fix an actual miscompilation, or only a theoretical miscompilation?
Depends on what we name an actual miscompilation.
I've built a defconfig kernel and looked through some of the functions
generated by GCC 7.3.0 with and without this clobber, and didn't spot
any miscompilations.
However there is a (trivial) theoretical case where this code leads to
miscompilation: https://lkml.org/lkml/2019/3/28/393 using just GCC
8.3.0 with -O2.
It isn't hard to imagine someone writes such a function in the kernel someday.

So the primary motivation is to fix an existing misuse of the asm
directive, which happens to work in certain configurations now, but
isn't guaranteed to work under different circumstances.

>  - If it fixes an existing miscompilation:
>
>    - Does it fix a miscompilation triggered by current/future versions of GCC?
>    - Does it fix a miscompilation triggered by current/future versions of Clang?
>
>  - Also, is the miscompilation triggered by 'usual' kernel configs, or
>    does it require exotics such as weird debug options or GCC plugins,
>    etc?
>
> I.e. a bit more context would be useful.
>
> Thanks,
>
>         Ingo



-- 
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] x86/asm: fix assembly constraints in bitops
  2019-04-05 11:53   ` Alexander Potapenko
@ 2019-04-06  8:20     ` Ingo Molnar
  0 siblings, 0 replies; 10+ messages in thread
From: Ingo Molnar @ 2019-04-06  8:20 UTC (permalink / raw)
  To: Alexander Potapenko
  Cc: Paul McKenney, H. Peter Anvin, Peter Zijlstra, LKML,
	Dmitriy Vyukov, James Y Knight, x86, Ingo Molnar, Peter Zijlstra,
	Linus Torvalds, Borislav Petkov, Andy Lutomirski


* Alexander Potapenko <glider@google.com> wrote:

> On Fri, Apr 5, 2019 at 11:39 AM Ingo Molnar <mingo@kernel.org> wrote:
> >
> >
> > * Alexander Potapenko <glider@google.com> wrote:
> >
> > > 1. Use memory clobber in bitops that touch arbitrary memory
> > >
> > > Certain bit operations that read/write bits take a base pointer and an
> > > arbitrarily large offset to address the bit relative to that base.
> > > Inline assembly constraints aren't expressive enough to tell the
> > > compiler that the assembly directive is going to touch a specific memory
> > > location of unknown size, therefore we have to use the "memory" clobber
> > > to indicate that the assembly is going to access memory locations other
> > > than those listed in the inputs/outputs.
> > > To indicate that BTR/BTS instructions don't necessarily touch the first
> > > sizeof(long) bytes of the argument, we also move the address to assembly
> > > inputs.
> > >
> > > This particular change leads to size increase of 124 kernel functions in
> > > a defconfig build. For some of them the diff is in NOP operations, other
> > > end up re-reading values from memory and may potentially slow down the
> > > execution. But without these clobbers the compiler is free to cache
> > > the contents of the bitmaps and use them as if they weren't changed by
> > > the inline assembly.
> > >
> > > 2. Use byte-sized arguments for operations touching single bytes.
> > >
> > > Passing a long value to ANDB/ORB/XORB instructions makes the compiler
> > > treat sizeof(long) bytes as being clobbered, which isn't the case. This
> > > may theoretically lead to worse code in the case of heavy optimization.
> > >
> > > Signed-off-by: Alexander Potapenko <glider@google.com>
> > > Cc: Dmitry Vyukov <dvyukov@google.com>
> > > Cc: Paul E. McKenney <paulmck@linux.ibm.com>
> > > Cc: H. Peter Anvin <hpa@zytor.com>
> > > Cc: Peter Zijlstra <peterz@infradead.org>
> > > Cc: James Y Knight <jyknight@google.com>
> > > ---
> > >  v2:
> > >   -- renamed the patch
> > >   -- addressed comment by Peter Zijlstra: don't use "+m" for functions
> > >   returning void
> > >   -- fixed input types for operations touching single bytes
> > > ---
> > >  arch/x86/include/asm/bitops.h | 41 +++++++++++++++--------------------
> > >  1 file changed, 18 insertions(+), 23 deletions(-)
> >
> > I'm wondering what the primary motivation for the patch is:
> >
> >  - Does it fix an actual miscompilation, or only a theoretical miscompilation?
> Depends on what we name an actual miscompilation.
> I've built a defconfig kernel and looked through some of the functions
> generated by GCC 7.3.0 with and without this clobber, and didn't spot
> any miscompilations.
> However there is a (trivial) theoretical case where this code leads to
> miscompilation: https://lkml.org/lkml/2019/3/28/393 using just GCC
> 8.3.0 with -O2.
> It isn't hard to imagine someone writes such a function in the kernel someday.
> 
> So the primary motivation is to fix an existing misuse of the asm
> directive, which happens to work in certain configurations now, but
> isn't guaranteed to work under different circumstances.

Thanks, that's all the context this patch needed: the miscompilation is 
real, demonstrated, and the pattern of your testcase doesn't look overly 
weird.

Also the 'cost' side of your patch appears to be pretty low, the 
defconfig-64 bloat from the stricter asm() constraints appears to be very 
small and not measurable in .text section size with bog standard GCC 7.3:

  text       data     bss      dec        hex      filename        sha1
  19565909   4974784  1826888  26367581   192565d  vmlinux.before  07f91fa36d2b477b245c7fee283dd3b7
  19565909   4974784  1826888  26367581   192565d  vmlinux.after   51a3f9a5fec4c29198953c06672a61a5

The allmodconfig-64 .text bloat appears to be zero as well:

  text       data     bss      dec        hex      filename        sha1
  27058207   34467402 30863436 92389045   581beb5  vmlinux.before  b38c470330f38779ab0be08fd7d90053
  27058207   34467402 30863436 92389045   581beb5  vmlinux.after   36c99be7cbebc13899ae22ced32fa2ec

Note that defconfig/allmodconfig is only a fraction of the kernel's 
complexity, especially if we consider the myriads of build time options 
in the Kconfig space plus the compiler variants out there.

Anyway, I agree that this needs fixing, so I have queued up your 
constraint fixes for x86/urgent, with a -stable tag.

We'll probably not push it to Linus until next week (-rc5 time), to make 
sure there are no surprises, and also to allow for any late review 
feedback.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:x86/urgent] x86/asm: Use stricter assembly constraints in bitops
  2019-04-02 11:28 [PATCH v2] x86/asm: fix assembly constraints in bitops Alexander Potapenko
                   ` (2 preceding siblings ...)
  2019-04-05  9:39 ` Ingo Molnar
@ 2019-04-06  8:46 ` tip-bot for Alexander Potapenko
  3 siblings, 0 replies; 10+ messages in thread
From: tip-bot for Alexander Potapenko @ 2019-04-06  8:46 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, jyknight, tglx, hpa, peterz, dvlasenk, linux-kernel,
	paulmck, stable, bp, torvalds, dvyukov, glider, brgerst, luto

Commit-ID:  5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03
Gitweb:     https://git.kernel.org/tip/5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03
Author:     Alexander Potapenko <glider@google.com>
AuthorDate: Tue, 2 Apr 2019 13:28:13 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 6 Apr 2019 09:52:02 +0200

x86/asm: Use stricter assembly constraints in bitops

There's a number of problems with how arch/x86/include/asm/bitops.h
is currently using assembly constraints for the memory region
bitops are modifying:

1) Use memory clobber in bitops that touch arbitrary memory

Certain bit operations that read/write bits take a base pointer and an
arbitrarily large offset to address the bit relative to that base.
Inline assembly constraints aren't expressive enough to tell the
compiler that the assembly directive is going to touch a specific memory
location of unknown size, therefore we have to use the "memory" clobber
to indicate that the assembly is going to access memory locations other
than those listed in the inputs/outputs.

To indicate that BTR/BTS instructions don't necessarily touch the first
sizeof(long) bytes of the argument, we also move the address to assembly
inputs.

This particular change leads to size increase of 124 kernel functions in
a defconfig build. For some of them the diff is in NOP operations, other
end up re-reading values from memory and may potentially slow down the
execution. But without these clobbers the compiler is free to cache
the contents of the bitmaps and use them as if they weren't changed by
the inline assembly.

2) Use byte-sized arguments for operations touching single bytes.

Passing a long value to ANDB/ORB/XORB instructions makes the compiler
treat sizeof(long) bytes as being clobbered, which isn't the case. This
may theoretically lead to worse code in the case of heavy optimization.

Practical impact:

I've built a defconfig kernel and looked through some of the functions
generated by GCC 7.3.0 with and without this clobber, and didn't spot
any miscompilations.

However there is a (trivial) theoretical case where this code leads to
miscompilation:

  https://lkml.org/lkml/2019/3/28/393

using just GCC 8.3.0 with -O2.  It isn't hard to imagine someone writes
such a function in the kernel someday.

So the primary motivation is to fix an existing misuse of the asm
directive, which happens to work in certain configurations now, but
isn't guaranteed to work under different circumstances.

[ --mingo: Added -stable tag because defconfig only builds a fraction
  of the kernel and the trivial testcase looks normal enough to
  be used in existing or in-development code. ]

Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: <stable@vger.kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: James Y Knight <jyknight@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190402112813.193378-1-glider@google.com
[ Edited the changelog, tidied up one of the defines. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bitops.h | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index d153d570bb04..8e790ec219a5 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,16 +36,17 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define RLONG_ADDR(x)			 "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x)			"+m" (*(volatile char *) (x))
 
-#define ADDR				BITOP_ADDR(addr)
+#define ADDR				RLONG_ADDR(addr)
 
 /*
  * We do the locked ops that don't return the old value as
  * a mask operation on a byte.
  */
 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr)	WBYTE_ADDR((void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 /**
@@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
 			: "memory");
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
-			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+	asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)~CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
 
 static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
 	bool negative;
 	asm volatile(LOCK_PREFIX "andb %2,%1"
 		CC_SET(s)
-		: CC_OUT(s) (negative), ADDR
+		: CC_OUT(s) (negative), WBYTE_ADDR(addr)
 		: "ir" ((char) ~(1 << nr)) : "memory");
 	return negative;
 }
@@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
  * __clear_bit() is non-atomic and implies release semantics before the memory
  * operation. It can be used for an unlock if no other CPUs can concurrently
  * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
  */
 static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
-	barrier();
 	__clear_bit(nr, addr);
 }
 
@@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
  */
 static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 
 	asm(__ASM_SIZE(bts) " %2,%1"
 	    CC_SET(c)
-	    : CC_OUT(c) (oldbit), ADDR
-	    : "Ir" (nr));
+	    : CC_OUT(c) (oldbit)
+	    : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 
 	asm volatile(__ASM_SIZE(btr) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr));
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 
 	asm volatile(__ASM_SIZE(btc) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr) : "memory");
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 
 	return oldbit;
 }
@@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 	asm volatile(__ASM_SIZE(bt) " %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit)
-		     : "m" (*(unsigned long *)addr), "Ir" (nr));
+		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
 
 	return oldbit;
 }

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-04-06  8:46 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-02 11:28 [PATCH v2] x86/asm: fix assembly constraints in bitops Alexander Potapenko
2019-04-02 11:33 ` Alexander Potapenko
2019-04-02 11:45 ` David Laight
2019-04-02 12:35   ` Alexander Potapenko
2019-04-02 12:37     ` Alexander Potapenko
2019-04-05  9:39 ` Ingo Molnar
2019-04-05 11:12   ` David Laight
2019-04-05 11:53   ` Alexander Potapenko
2019-04-06  8:20     ` Ingo Molnar
2019-04-06  8:46 ` [tip:x86/urgent] x86/asm: Use stricter " tip-bot for Alexander Potapenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).