linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86: modernize sync_bitops.h
@ 2018-06-25 10:24 Jan Beulich
  2018-06-26  7:18 ` Ingo Molnar
       [not found] ` <5B30C2C302000000000FA99B@prv1-mh.provo.novell.com>
  0 siblings, 2 replies; 12+ messages in thread
From: Jan Beulich @ 2018-06-25 10:24 UTC (permalink / raw)
  To: mingo, tglx, hpa; +Cc: linux-kernel

Add missing insn suffixes and use rmwcc.h just like was (more or less)
recently done for bitops.h as well.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
 arch/x86/include/asm/sync_bitops.h |   34 ++++++++++++----------------------
 1 file changed, 12 insertions(+), 22 deletions(-)

--- 4.18-rc2/arch/x86/include/asm/sync_bitops.h
+++ 4.18-rc2-x86-sync-bitops-insn-suffixes/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
+#include <asm/rmwcc.h>
+
 #define ADDR (*(volatile long *)addr)
 
 /**
@@ -29,7 +31,7 @@
  */
 static inline void sync_set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; bts %1,%0"
+	asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr,
  */
 static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btr %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long n
  */
 static inline void sync_change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btc %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -78,14 +80,10 @@ static inline void sync_change_bit(long
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; bts %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts),
+	                 *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -98,12 +96,8 @@ static inline int sync_test_and_set_bit(
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btr %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr),
+	                 *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -116,12 +110,8 @@ static inline int sync_test_and_clear_bi
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btc %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc),
+	                 *addr, "Ir", nr, "%0", c);
 }
 
 #define sync_test_bit(nr, addr) test_bit(nr, addr)





^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] x86: modernize sync_bitops.h
  2018-06-25 10:24 [PATCH] x86: modernize sync_bitops.h Jan Beulich
@ 2018-06-26  7:18 ` Ingo Molnar
  2018-06-26  7:26   ` Jan Beulich
       [not found] ` <5B30C2C302000000000FA99B@prv1-mh.provo.novell.com>
  1 sibling, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2018-06-26  7:18 UTC (permalink / raw)
  To: Jan Beulich; +Cc: mingo, tglx, hpa, linux-kernel


* Jan Beulich <JBeulich@suse.com> wrote:

> Add missing insn suffixes and use rmwcc.h just like was (more or less)
> recently done for bitops.h as well.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
>  arch/x86/include/asm/sync_bitops.h |   34 ++++++++++++----------------------
>  1 file changed, 12 insertions(+), 22 deletions(-)

Have you verified that a typical x86 vmlinux (with defconfig for example) is the 
same before/after?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] x86: modernize sync_bitops.h
  2018-06-26  7:18 ` Ingo Molnar
@ 2018-06-26  7:26   ` Jan Beulich
  0 siblings, 0 replies; 12+ messages in thread
From: Jan Beulich @ 2018-06-26  7:26 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: mingo, tglx, linux-kernel, hpa

>>> On 26.06.18 at 09:18, <mingo@kernel.org> wrote:
> * Jan Beulich <JBeulich@suse.com> wrote:
> 
>> Add missing insn suffixes and use rmwcc.h just like was (more or less)
>> recently done for bitops.h as well.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>>  arch/x86/include/asm/sync_bitops.h |   34 ++++++++++++----------------------
>>  1 file changed, 12 insertions(+), 22 deletions(-)
> 
> Have you verified that a typical x86 vmlinux (with defconfig for example) is the 
> same before/after?

It works the same, but the binary is unlikely to be the same with the switch
to condition code asm() outputs.

Jan



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] x86: modernize sync_bitops.h
       [not found]   ` <5B30C2C302000078001FE5A0@prv1-mh.provo.novell.com>
@ 2018-11-21 10:11     ` Jan Beulich
  2018-11-21 11:55       ` David Laight
       [not found]     ` <5BF52F4D02000000001006ED@prv1-mh.provo.novell.com>
  1 sibling, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2018-11-21 10:11 UTC (permalink / raw)
  To: mingo, tglx, hpa; +Cc: Boris Ostrovsky, Juergen Gross, linux-kernel

Add missing insn suffixes and use rmwcc.h just like was (more or less)
recently done for bitops.h as well.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Re-base over rmwcc.h changes.
---
 arch/x86/include/asm/sync_bitops.h |   31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

--- 4.20-rc3/arch/x86/include/asm/sync_bitops.h
+++ 4.20-rc3-x86-sync-bitops-insn-suffixes/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
+#include <asm/rmwcc.h>
+
 #define ADDR (*(volatile long *)addr)
 
 /**
@@ -29,7 +31,7 @@
  */
 static inline void sync_set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; bts %1,%0"
+	asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr,
  */
 static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btr %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long n
  */
 static inline void sync_change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btc %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -78,14 +80,9 @@ static inline void sync_change_bit(long
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; bts %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
 }
 
 /**
@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btr %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
 }
 
 /**
@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bi
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btc %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
 }
 
 #define sync_test_bit(nr, addr) test_bit(nr, addr)




^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] x86: modernize sync_bitops.h
  2018-11-21 10:11     ` [PATCH v2] " Jan Beulich
@ 2018-11-21 11:55       ` David Laight
  2018-11-21 13:02         ` Jan Beulich
  0 siblings, 1 reply; 12+ messages in thread
From: David Laight @ 2018-11-21 11:55 UTC (permalink / raw)
  To: 'Jan Beulich', mingo, tglx, hpa
  Cc: Boris Ostrovsky, Juergen Gross, linux-kernel

From: Jan Beulich
> Sent: 21 November 2018 10:11
> 
> Add missing insn suffixes and use rmwcc.h just like was (more or less)
> recently done for bitops.h as well.

Why? bts (etc) on memory don't really have an 'operand size'.
IIRC the suffix determines the width of the %cx register that selects the bit number.
I'd have to look up what the "Ir" constraint means.

IIRC the i386 book just said that the cpu might (aka will) read/write 4 bytes
surrounding the required byte.
More recent books say it does an aligned 4 byte read/write.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] x86: modernize sync_bitops.h
  2018-11-21 11:55       ` David Laight
@ 2018-11-21 13:02         ` Jan Beulich
  2018-11-21 13:49           ` David Laight
  0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2018-11-21 13:02 UTC (permalink / raw)
  To: David Laight
  Cc: mingo, tglx, Boris Ostrovsky, Juergen Gross, linux-kernel, hpa

>>> On 21.11.18 at 12:55, <David.Laight@ACULAB.COM> wrote:
> From: Jan Beulich
>> Sent: 21 November 2018 10:11
>> 
>> Add missing insn suffixes and use rmwcc.h just like was (more or less)
>> recently done for bitops.h as well.
> 
> Why? bts (etc) on memory don't really have an 'operand size'.

Of course they do - depending on operand size they operate on
2-, 4-, or 8-byte quantities. When the second operand is a
register, the suffix is redundant (but doesn't hurt), but when
the second operand is an immediate, the assembler (in AT&T
syntax) has no way of knowing what operand size you mean.

Jan



^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] x86: modernize sync_bitops.h
  2018-11-21 13:02         ` Jan Beulich
@ 2018-11-21 13:49           ` David Laight
  2018-11-21 14:41             ` Jan Beulich
  0 siblings, 1 reply; 12+ messages in thread
From: David Laight @ 2018-11-21 13:49 UTC (permalink / raw)
  To: 'Jan Beulich'
  Cc: mingo, tglx, Boris Ostrovsky, Juergen Gross, linux-kernel, hpa

From: Jan Beulich
> Sent: 21 November 2018 13:03
> 
> >>> On 21.11.18 at 12:55, <David.Laight@ACULAB.COM> wrote:
> > From: Jan Beulich
> >> Sent: 21 November 2018 10:11
> >>
> >> Add missing insn suffixes and use rmwcc.h just like was (more or less)
> >> recently done for bitops.h as well.
> >
> > Why? bts (etc) on memory don't really have an 'operand size'.
> 
> Of course they do - depending on operand size they operate on
> 2-, 4-, or 8-byte quantities. When the second operand is a
> register, the suffix is redundant (but doesn't hurt), but when
> the second operand is an immediate, the assembler (in AT&T
> syntax) has no way of knowing what operand size you mean.

You need to RTFM.

Regardless of the 'operand size' the 'bit' instructions do a 32 bit aligned
32 bit wide read/modify/write cycle.

The 'operand size' does affect whether the bit number (which is signed)
comes from %cl (8 bits), %cx (16 bits), %rcx (32 bits) or (%ecx) 64 bits.
But that is implicit in the register name used.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] x86: modernize sync_bitops.h
  2018-11-21 13:49           ` David Laight
@ 2018-11-21 14:41             ` Jan Beulich
  2018-11-21 15:53               ` David Laight
  0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2018-11-21 14:41 UTC (permalink / raw)
  To: David Laight
  Cc: mingo, tglx, Boris Ostrovsky, Juergen Gross, linux-kernel, hpa

>>> On 21.11.18 at 14:49, <David.Laight@ACULAB.COM> wrote:
> From: Jan Beulich
>> Sent: 21 November 2018 13:03
>> 
>> >>> On 21.11.18 at 12:55, <David.Laight@ACULAB.COM> wrote:
>> > From: Jan Beulich
>> >> Sent: 21 November 2018 10:11
>> >>
>> >> Add missing insn suffixes and use rmwcc.h just like was (more or less)
>> >> recently done for bitops.h as well.
>> >
>> > Why? bts (etc) on memory don't really have an 'operand size'.
>> 
>> Of course they do - depending on operand size they operate on
>> 2-, 4-, or 8-byte quantities. When the second operand is a
>> register, the suffix is redundant (but doesn't hurt), but when
>> the second operand is an immediate, the assembler (in AT&T
>> syntax) has no way of knowing what operand size you mean.
> 
> You need to RTFM.

Excuse me? How about you look at this table from the SDM
(format of course comes out better in the .pdf):

0F AB /r BTS r/m16, r16 MR Valid Valid Store selected bit in CF flag and set.
0F AB /r BTS r/m32, r32 MR Valid Valid Store selected bit in CF flag and set.
REX.W + 0F AB /r BTS r/m64, r64 MR Valid N.E. Store selected bit in CF flag and set.
0F BA /5 ib BTS r/m16, imm8 MI Valid Valid Store selected bit in CF flag and set.
0F BA /5 ib BTS r/m32, imm8 MI Valid Valid Store selected bit in CF flag and set.
REX.W + 0F BA /5 ib BTS r/m64, imm8 MI Valid N.E. Store selected bit in CF flag and set.

Please read manuals yourself before making such statements.

> Regardless of the 'operand size' the 'bit' instructions do a 32 bit aligned
> 32 bit wide read/modify/write cycle.
> 
> The 'operand size' does affect whether the bit number (which is signed)
> comes from %cl (8 bits), %cx (16 bits), %rcx (32 bits) or (%ecx) 64 bits.
> But that is implicit in the register name used.

There is no form with %cl as operand. Instead there are forms with
an immediate operand. 

Jan



^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] x86: modernize sync_bitops.h
  2018-11-21 14:41             ` Jan Beulich
@ 2018-11-21 15:53               ` David Laight
  2018-11-27 19:51                 ` Sean Christopherson
  0 siblings, 1 reply; 12+ messages in thread
From: David Laight @ 2018-11-21 15:53 UTC (permalink / raw)
  To: 'Jan Beulich'
  Cc: mingo, tglx, Boris Ostrovsky, Juergen Gross, linux-kernel, hpa



> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 21 November 2018 14:42
> To: David Laight
> Cc: mingo@elte.hu; tglx@linutronix.de; Boris Ostrovsky; Juergen Gross; linux-kernel@vger.kernel.org;
> hpa@zytor.com
> Subject: RE: [PATCH v2] x86: modernize sync_bitops.h
> 
> >>> On 21.11.18 at 14:49, <David.Laight@ACULAB.COM> wrote:
> > From: Jan Beulich
> >> Sent: 21 November 2018 13:03
> >>
> >> >>> On 21.11.18 at 12:55, <David.Laight@ACULAB.COM> wrote:
> >> > From: Jan Beulich
> >> >> Sent: 21 November 2018 10:11
> >> >>
> >> >> Add missing insn suffixes and use rmwcc.h just like was (more or less)
> >> >> recently done for bitops.h as well.
> >> >
> >> > Why? bts (etc) on memory don't really have an 'operand size'.
> >>
> >> Of course they do - depending on operand size they operate on
> >> 2-, 4-, or 8-byte quantities. When the second operand is a
> >> register, the suffix is redundant (but doesn't hurt), but when
> >> the second operand is an immediate, the assembler (in AT&T
> >> syntax) has no way of knowing what operand size you mean.
> >
> > You need to RTFM.
> 
> Excuse me? How about you look at this table from the SDM
> (format of course comes out better in the .pdf):
> 
> 0F AB /r BTS r/m16, r16 MR Valid Valid Store selected bit in CF flag and set.
> 0F AB /r BTS r/m32, r32 MR Valid Valid Store selected bit in CF flag and set.
> REX.W + 0F AB /r BTS r/m64, r64 MR Valid N.E. Store selected bit in CF flag and set.
> 0F BA /5 ib BTS r/m16, imm8 MI Valid Valid Store selected bit in CF flag and set.
> 0F BA /5 ib BTS r/m32, imm8 MI Valid Valid Store selected bit in CF flag and set.
> REX.W + 0F BA /5 ib BTS r/m64, imm8 MI Valid N.E. Store selected bit in CF flag and set.
> 
> Please read manuals yourself before making such statements.
> 
> > Regardless of the 'operand size' the 'bit' instructions do a 32 bit aligned
> > 32 bit wide read/modify/write cycle.
> >
> > The 'operand size' does affect whether the bit number (which is signed)
> > comes from %cl (8 bits), %cx (16 bits), %rcx (32 bits) or (%ecx) 64 bits.
> > But that is implicit in the register name used.
> 
> There is no form with %cl as operand. Instead there are forms with
> an immediate operand.

See also section 3.1.1.9 (in the copy of 325462.pdf I have):
(edited out references to the figures)

Bit(BitBase, BitOffset) — Returns the value of a bit within a bit string.
   The bit string is a sequence of bits in memory or a register. Bits are
   numbered from low-order to high-order within registers and within memory
   bytes. If the BitBase is a register, the BitOffset can be in the range 0
   to [15, 31, 63] depending on the mode and register size.

   If BitBase is a memory address, the BitOffset can range has different ranges
   depending on the operand size (see Table 3-2).

   The addressed bit is numbered (Offset MOD 8) within the byte at address
   (BitBase + (BitOffset DIV 8)) where DIV is signed division with rounding
   towards negative infinity and MOD returns a positive number.

Table 3-2. Range of Bit Positions Specified by Bit Offset Operands
Operand Size Immediate BitOffset   Register BitOffset
    16             0 to 15         2**15 to 2**15 − 1
    32             0 to 31         2**31 to 2**31 − 1
    64             0 to 63         2**63 to 2**63 − 1

For Bit test it says:

When accessing a bit in memory, the processor may access 4 bytes starting from
the memory address for a 32-bit operand size, using by the following relationship:
    Effective Address + (4 ∗ (BitOffset DIV 32))
Or, it may access 2 bytes starting from the memory address for a 16-bit operand, using this relationship:
    Effective Address + (2 ∗ (BitOffset DIV 16))
It may do so even when only a single byte needs to be accessed to reach the given bit.

(That is the text I remember being in the old docs, but I don't think it
used to refer to the address being aligned.
My 8086, 286 and 386 books are all at home.)

The other instructions (eg btc) say:

If the bit base operand specifies a memory location, the operand represents the
address of the byte in memory that contains the bit base (bit 0 of the specified byte)
of the bit string. The range of the bit position that can be
referenced by the offset operand depends on the operand size.

So the 'operand size' gives the size of the bit offset, not the size of
the memory cycle.

Maybe I'll run some instructions against a PCIe slave and monitor the TLPs.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] x86: modernize sync_bitops.h
  2018-11-21 15:53               ` David Laight
@ 2018-11-27 19:51                 ` Sean Christopherson
  0 siblings, 0 replies; 12+ messages in thread
From: Sean Christopherson @ 2018-11-27 19:51 UTC (permalink / raw)
  To: David Laight
  Cc: 'Jan Beulich',
	mingo, tglx, Boris Ostrovsky, Juergen Gross, linux-kernel, hpa

On Wed, Nov 21, 2018 at 03:53:05PM +0000, David Laight wrote:
> 
> 
> > -----Original Message-----
> > From: Jan Beulich [mailto:JBeulich@suse.com]
> > Sent: 21 November 2018 14:42
> > To: David Laight
> > Cc: mingo@elte.hu; tglx@linutronix.de; Boris Ostrovsky; Juergen Gross; linux-kernel@vger.kernel.org;
> > hpa@zytor.com
> > Subject: RE: [PATCH v2] x86: modernize sync_bitops.h
> > 
> > >>> On 21.11.18 at 14:49, <David.Laight@ACULAB.COM> wrote:
> > > From: Jan Beulich
> > >> Sent: 21 November 2018 13:03
> > >>
> > >> >>> On 21.11.18 at 12:55, <David.Laight@ACULAB.COM> wrote:
> > >> > From: Jan Beulich
> > >> >> Sent: 21 November 2018 10:11
> > >> >>
> > >> >> Add missing insn suffixes and use rmwcc.h just like was (more or less)
> > >> >> recently done for bitops.h as well.
> > >> >
> > >> > Why? bts (etc) on memory don't really have an 'operand size'.
> > >>
> > >> Of course they do - depending on operand size they operate on
> > >> 2-, 4-, or 8-byte quantities. When the second operand is a
> > >> register, the suffix is redundant (but doesn't hurt), but when
> > >> the second operand is an immediate, the assembler (in AT&T
> > >> syntax) has no way of knowing what operand size you mean.
> > >
> > > You need to RTFM.
> > 
> > Excuse me? How about you look at this table from the SDM
> > (format of course comes out better in the .pdf):
> > 
> > 0F AB /r BTS r/m16, r16 MR Valid Valid Store selected bit in CF flag and set.
> > 0F AB /r BTS r/m32, r32 MR Valid Valid Store selected bit in CF flag and set.
> > REX.W + 0F AB /r BTS r/m64, r64 MR Valid N.E. Store selected bit in CF flag and set.
> > 0F BA /5 ib BTS r/m16, imm8 MI Valid Valid Store selected bit in CF flag and set.
> > 0F BA /5 ib BTS r/m32, imm8 MI Valid Valid Store selected bit in CF flag and set.
> > REX.W + 0F BA /5 ib BTS r/m64, imm8 MI Valid N.E. Store selected bit in CF flag and set.
> > 
> > Please read manuals yourself before making such statements.
> > 
> > > Regardless of the 'operand size' the 'bit' instructions do a 32 bit aligned
> > > 32 bit wide read/modify/write cycle.
> > >
> > > The 'operand size' does affect whether the bit number (which is signed)
> > > comes from %cl (8 bits), %cx (16 bits), %rcx (32 bits) or (%ecx) 64 bits.
> > > But that is implicit in the register name used.
> > 
> > There is no form with %cl as operand. Instead there are forms with
> > an immediate operand.
> 
> See also section 3.1.1.9 (in the copy of 325462.pdf I have):
> (edited out references to the figures)
> 
> Bit(BitBase, BitOffset) — Returns the value of a bit within a bit string.
>    The bit string is a sequence of bits in memory or a register. Bits are
>    numbered from low-order to high-order within registers and within memory
>    bytes. If the BitBase is a register, the BitOffset can be in the range 0
>    to [15, 31, 63] depending on the mode and register size.
> 
>    If BitBase is a memory address, the BitOffset can range has different ranges
>    depending on the operand size (see Table 3-2).
> 
>    The addressed bit is numbered (Offset MOD 8) within the byte at address
>    (BitBase + (BitOffset DIV 8)) where DIV is signed division with rounding
>    towards negative infinity and MOD returns a positive number.
> 
> Table 3-2. Range of Bit Positions Specified by Bit Offset Operands
> Operand Size Immediate BitOffset   Register BitOffset
>     16             0 to 15         2**15 to 2**15 − 1
>     32             0 to 31         2**31 to 2**31 − 1
>     64             0 to 63         2**63 to 2**63 − 1
> 
> For Bit test it says:
> 
> When accessing a bit in memory, the processor may access 4 bytes starting from
> the memory address for a 32-bit operand size, using by the following relationship:
>     Effective Address + (4 ∗ (BitOffset DIV 32))
> Or, it may access 2 bytes starting from the memory address for a 16-bit operand, using this relationship:
>     Effective Address + (2 ∗ (BitOffset DIV 16))
> It may do so even when only a single byte needs to be accessed to reach the given bit.

This agrees with what Jan is saying, e.g. "4 bytes ... for a 32-bit op size"
and "2 bytes ... for a 16-bit op size".  For whatever reason the 64-bit op
size is omitted, but the normal behavior applies.

> 
> (That is the text I remember being in the old docs, but I don't think it
> used to refer to the address being aligned.
> My 8086, 286 and 386 books are all at home.)
> 
> The other instructions (eg btc) say:
> 
> If the bit base operand specifies a memory location, the operand represents the
> address of the byte in memory that contains the bit base (bit 0 of the specified byte)
> of the bit string. The range of the bit position that can be
> referenced by the offset operand depends on the operand size.
> 
> So the 'operand size' gives the size of the bit offset, not the size of
> the memory cycle.

No, it determines both.  Here's rough pseudocode for how the op size
affects what memory is read.  Note that there isn't alignment per se,
but rather the memory accessed is offset by a modulo calculation of
the bit number, i.e. by the bits "above" the op_size.  This doesn't
apply to the imm8 encoding because bits above the op_size are ignored.

	mem = decode_mem(op1);

	if (is_reg(op2)) {
		bit = read_reg(op2);

		/* sign-extend and align the offset */
		offset = sign_extend(bit, op_size);
		if (op_size == 2)
			offset &= ~0x1;
		else if (op_size == 4)
			offset &= ~0x3;
		else if (op_size == 8)
			offset &= ~0x7;

		/* memory displacement adjusted by offset */
		mem.displacement += offset;
	} else {
		bit = decode_imm(op2);
	}

	/* imm8 ignores bits above op_size, reg adjusts offset (above) */
	if (op_size == 2)
		bit &= 0xf;
	else if (op_size == 4)
		bit &= 0x1f;
	else if (op_size == 8)
		bit &= 0x3f;

	tmp = read_mem(mem, op_size);
	tmp = bit_op(tmp, bit);
	write_mem(mem, op_size);

> Maybe I'll run some instructions against a PCIe slave and monitor the TLPs.
> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 RESEND] x86: modernize sync_bitops.h
       [not found]       ` <5BF52F4D020000780022227E@prv1-mh.provo.novell.com>
@ 2019-03-27 15:15         ` Jan Beulich
  2019-04-10  8:47           ` [tip:x86/asm] x86/asm: Modernize sync_bitops.h tip-bot for Jan Beulich
  0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2019-03-27 15:15 UTC (permalink / raw)
  To: mingo, tglx, hpa; +Cc: Boris Ostrovsky, Juergen Gross, linux-kernel

Add missing insn suffixes and use rmwcc.h just like was (more or less)
recently done for bitops.h as well.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Re-base over rmwcc.h changes.
---
 arch/x86/include/asm/sync_bitops.h |   31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
+#include <asm/rmwcc.h>
+
 #define ADDR (*(volatile long *)addr)
 
 /**
@@ -29,7 +31,7 @@
  */
 static inline void sync_set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; bts %1,%0"
+	asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr,
  */
 static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btr %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long n
  */
 static inline void sync_change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btc %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -78,14 +80,9 @@ static inline void sync_change_bit(long
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; bts %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
 }
 
 /**
@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btr %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
 }
 
 /**
@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bi
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btc %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
 }
 
 #define sync_test_bit(nr, addr) test_bit(nr, addr)





^ permalink raw reply	[flat|nested] 12+ messages in thread

* [tip:x86/asm] x86/asm: Modernize sync_bitops.h
  2019-03-27 15:15         ` [PATCH v2 RESEND] " Jan Beulich
@ 2019-04-10  8:47           ` tip-bot for Jan Beulich
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Jan Beulich @ 2019-04-10  8:47 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: jbeulich, boris.ostrovsky, dvlasenk, tglx, torvalds, bp, hpa,
	mingo, brgerst, jgross, JBeulich, luto, linux-kernel, peterz

Commit-ID:  547571b5abe61bb33c6005d8981e86e3c61fedcc
Gitweb:     https://git.kernel.org/tip/547571b5abe61bb33c6005d8981e86e3c61fedcc
Author:     Jan Beulich <JBeulich@suse.com>
AuthorDate: Wed, 27 Mar 2019 09:15:19 -0600
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 10 Apr 2019 09:53:31 +0200

x86/asm: Modernize sync_bitops.h

Add missing instruction suffixes and use rmwcc.h just like was (more or less)
recently done for bitops.h as well, see:

  22636f8c9511: x86/asm: Add instruction suffixes to bitops
  288e4521f0f6: x86/asm: 'Simplify' GEN_*_RMWcc() macros

No change in functionality intended.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/5C9B93870200007800222289@prv1-mh.provo.novell.com
[ Cleaned up the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/sync_bitops.h | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 2fe745356fb1..6d8d6bc183b7 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
+#include <asm/rmwcc.h>
+
 #define ADDR (*(volatile long *)addr)
 
 /**
@@ -29,7 +31,7 @@
  */
 static inline void sync_set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; bts %1,%0"
+	asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btr %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline void sync_change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btc %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -78,14 +80,9 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; bts %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
 }
 
 /**
@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btr %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
 }
 
 /**
@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btc %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
 }
 
 #define sync_test_bit(nr, addr) test_bit(nr, addr)

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-04-10  8:48 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-25 10:24 [PATCH] x86: modernize sync_bitops.h Jan Beulich
2018-06-26  7:18 ` Ingo Molnar
2018-06-26  7:26   ` Jan Beulich
     [not found] ` <5B30C2C302000000000FA99B@prv1-mh.provo.novell.com>
     [not found]   ` <5B30C2C302000078001FE5A0@prv1-mh.provo.novell.com>
2018-11-21 10:11     ` [PATCH v2] " Jan Beulich
2018-11-21 11:55       ` David Laight
2018-11-21 13:02         ` Jan Beulich
2018-11-21 13:49           ` David Laight
2018-11-21 14:41             ` Jan Beulich
2018-11-21 15:53               ` David Laight
2018-11-27 19:51                 ` Sean Christopherson
     [not found]     ` <5BF52F4D02000000001006ED@prv1-mh.provo.novell.com>
     [not found]       ` <5BF52F4D020000780022227E@prv1-mh.provo.novell.com>
2019-03-27 15:15         ` [PATCH v2 RESEND] " Jan Beulich
2019-04-10  8:47           ` [tip:x86/asm] x86/asm: Modernize sync_bitops.h tip-bot for Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).