x86: drop REG_OUT macro from hweight functions
diff mbox series

Message ID 20190728115140.GA32463@avx2
State New
Headers show
Series
  • x86: drop REG_OUT macro from hweight functions
Related show

Commit Message

Alexey Dobriyan July 28, 2019, 11:51 a.m. UTC
Output register is always RAX/EAX.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 arch/x86/include/asm/arch_hweight.h |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

Comments

Peter Zijlstra July 29, 2019, 9:43 a.m. UTC | #1
On Sun, Jul 28, 2019 at 02:51:40PM +0300, Alexey Dobriyan wrote:
> Output register is always RAX/EAX.
> 
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
> 
>  arch/x86/include/asm/arch_hweight.h |    6 ++----
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> --- a/arch/x86/include/asm/arch_hweight.h
> +++ b/arch/x86/include/asm/arch_hweight.h
> @@ -6,10 +6,8 @@
>  
>  #ifdef CONFIG_64BIT
>  #define REG_IN "D"
> -#define REG_OUT "a"
>  #else
>  #define REG_IN "a"
> -#define REG_OUT "a"
>  #endif
>  
>  static __always_inline unsigned int __arch_hweight32(unsigned int w)
> @@ -17,7 +15,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
>  	unsigned int res;
>  
>  	asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
> -			 : "="REG_OUT (res)
> +			 : "=a" (res)
>  			 : REG_IN (w));
>  
>  	return res;
> @@ -45,7 +43,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
>  	unsigned long res;
>  
>  	asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
> -			 : "="REG_OUT (res)
> +			 : "=a" (res)
>  			 : REG_IN (w));
>  
>  	return res;

I _think_ something like the below should also work:

(fwiw _ASM_ARG 5 and 6 are broken, as are all the QLWB variants)

---
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..88704612b2f7 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -3,22 +3,15 @@
 #define _ASM_X86_HWEIGHT_H
 
 #include <asm/cpufeatures.h>
-
-#ifdef CONFIG_64BIT
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
+#include <asm/asm.h>
 
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res;
 
 	asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
-			 : "="REG_OUT (res)
-			 : REG_IN (w));
+			 : "=a" (res)
+			 : _ASM_ARG1 (w));
 
 	return res;
 }
@@ -45,8 +38,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
 	unsigned long res;
 
 	asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
-			 : "="REG_OUT (res)
-			 : REG_IN (w));
+			 : "=a" (res)
+			 : _ASM_ARG1 (w));
 
 	return res;
 }
Peter Zijlstra July 29, 2019, 10:04 a.m. UTC | #2
On Mon, Jul 29, 2019 at 11:43:29AM +0200, Peter Zijlstra wrote:

> I _think_ something like the below should also work:
> 
> (fwiw _ASM_ARG 5 and 6 are broken, as are all the QLWB variants)

Fixed that, because

> ---
> diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
> index ba88edd0d58b..88704612b2f7 100644
> --- a/arch/x86/include/asm/arch_hweight.h
> +++ b/arch/x86/include/asm/arch_hweight.h
> @@ -3,22 +3,15 @@
>  #define _ASM_X86_HWEIGHT_H
>  
>  #include <asm/cpufeatures.h>
> -
> -#ifdef CONFIG_64BIT
> -#define REG_IN "D"
> -#define REG_OUT "a"
> -#else
> -#define REG_IN "a"
> -#define REG_OUT "a"
> -#endif
> +#include <asm/asm.h>
>  
>  static __always_inline unsigned int __arch_hweight32(unsigned int w)
>  {
>  	unsigned int res;
>  
>  	asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
> -			 : "="REG_OUT (res)
> -			 : REG_IN (w));
> +			 : "=a" (res)
> +			 : _ASM_ARG1 (w));

That needs _ASM_ARG1L because popcntl..

---
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..3cab7f382a43 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -3,22 +3,15 @@
 #define _ASM_X86_HWEIGHT_H
 
 #include <asm/cpufeatures.h>
-
-#ifdef CONFIG_64BIT
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
+#include <asm/asm.h>
 
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res;
 
 	asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
-			 : "="REG_OUT (res)
-			 : REG_IN (w));
+			 : "=a" (res)
+			 : _ASM_ARG1L (w));
 
 	return res;
 }
@@ -45,8 +38,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
 	unsigned long res;
 
 	asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
-			 : "="REG_OUT (res)
-			 : REG_IN (w));
+			 : "=a" (res)
+			 : _ASM_ARG1 (w));
 
 	return res;
 }
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3ff577c0b102..0bb0bbcd4551 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -53,17 +53,17 @@
 #define _ASM_ARG2	_ASM_DX
 #define _ASM_ARG3	_ASM_CX
 
-#define _ASM_ARG1L	eax
-#define _ASM_ARG2L	edx
-#define _ASM_ARG3L	ecx
+#define _ASM_ARG1L	_ASM_ARG1
+#define _ASM_ARG2L	_ASM_ARG2
+#define _ASM_ARG3L	_ASM_ARG3
 
-#define _ASM_ARG1W	ax
-#define _ASM_ARG2W	dx
-#define _ASM_ARG3W	cx
+#define _ASM_ARG1W	__ASM_FORM_RAW(ax)
+#define _ASM_ARG2W	__ASM_FORM_RAW(dx)
+#define _ASM_ARG3W	__ASM_FORM_RAW(cx)
 
-#define _ASM_ARG1B	al
-#define _ASM_ARG2B	dl
-#define _ASM_ARG3B	cl
+#define _ASM_ARG1B	__ASM_FORM_RAW(al)
+#define _ASM_ARG2B	__ASM_FORM_RAW(dl)
+#define _ASM_ARG3B	__ASM_FORM_RAW(cl)
 
 #else
 /* 64 bit */
@@ -72,36 +72,29 @@
 #define _ASM_ARG2	_ASM_SI
 #define _ASM_ARG3	_ASM_DX
 #define _ASM_ARG4	_ASM_CX
-#define _ASM_ARG5	r8
-#define _ASM_ARG6	r9
-
-#define _ASM_ARG1Q	rdi
-#define _ASM_ARG2Q	rsi
-#define _ASM_ARG3Q	rdx
-#define _ASM_ARG4Q	rcx
-#define _ASM_ARG5Q	r8
-#define _ASM_ARG6Q	r9
-
-#define _ASM_ARG1L	edi
-#define _ASM_ARG2L	esi
-#define _ASM_ARG3L	edx
-#define _ASM_ARG4L	ecx
-#define _ASM_ARG5L	r8d
-#define _ASM_ARG6L	r9d
-
-#define _ASM_ARG1W	di
-#define _ASM_ARG2W	si
-#define _ASM_ARG3W	dx
-#define _ASM_ARG4W	cx
-#define _ASM_ARG5W	r8w
-#define _ASM_ARG6W	r9w
-
-#define _ASM_ARG1B	dil
-#define _ASM_ARG2B	sil
-#define _ASM_ARG3B	dl
-#define _ASM_ARG4B	cl
-#define _ASM_ARG5B	r8b
-#define _ASM_ARG6B	r9b
+#define _ASM_ARG5	__ASM_REG(8)
+#define _ASM_ARG6	__ASM_REG(9)
+
+#define _ASM_ARG1L	__ASM_FORM_RAW(edi)
+#define _ASM_ARG2L	__ASM_FORM_RAW(esi)
+#define _ASM_ARG3L	__ASM_FORM_RAW(edx)
+#define _ASM_ARG4L	__ASM_FORM_RAW(ecx)
+#define _ASM_ARG5L	__ASM_FORM_RAW(r8d)
+#define _ASM_ARG6L	__ASM_FORM_RAW(r9d)
+
+#define _ASM_ARG1W	__ASM_FORM_RAW(di)
+#define _ASM_ARG2W	__ASM_FORM_RAW(si)
+#define _ASM_ARG3W	__ASM_FORM_RAW(dx)
+#define _ASM_ARG4W	__ASM_FORM_RAW(cx)
+#define _ASM_ARG5W	__ASM_FORM_RAW(r8w)
+#define _ASM_ARG6W	__ASM_FORM_RAW(r9w)
+
+#define _ASM_ARG1B	__ASM_FORM_RAW(dil)
+#define _ASM_ARG2B	__ASM_FORM_RAW(sil)
+#define _ASM_ARG3B	__ASM_FORM_RAW(dl)
+#define _ASM_ARG4B	__ASM_FORM_RAW(cl)
+#define _ASM_ARG5B	__ASM_FORM_RAW(r8b)
+#define _ASM_ARG6B	__ASM_FORM_RAW(r9b)
 
 #endif
Alexey Dobriyan July 29, 2019, 8:44 p.m. UTC | #3
On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
> +#define _ASM_ARG1B	__ASM_FORM_RAW(dil)
> +#define _ASM_ARG2B	__ASM_FORM_RAW(sil)
> +#define _ASM_ARG3B	__ASM_FORM_RAW(dl)
> +#define _ASM_ARG4B	__ASM_FORM_RAW(cl)
> +#define _ASM_ARG5B	__ASM_FORM_RAW(r8b)
> +#define _ASM_ARG6B	__ASM_FORM_RAW(r9b)

I preprocessed percpu code once to see what precisely it does because
it was easier than wading through forest of macroes.

Hopefully x86 assembly won't get to that level.
Peter Zijlstra July 30, 2019, 8:08 a.m. UTC | #4
On Mon, Jul 29, 2019 at 11:44:17PM +0300, Alexey Dobriyan wrote:
> On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
> > +#define _ASM_ARG1B	__ASM_FORM_RAW(dil)
> > +#define _ASM_ARG2B	__ASM_FORM_RAW(sil)
> > +#define _ASM_ARG3B	__ASM_FORM_RAW(dl)
> > +#define _ASM_ARG4B	__ASM_FORM_RAW(cl)
> > +#define _ASM_ARG5B	__ASM_FORM_RAW(r8b)
> > +#define _ASM_ARG6B	__ASM_FORM_RAW(r9b)
> 
> I preprocessed percpu code once to see what precisely it does because
> it was easier than wading through forest of macroes.

Per cpu is easy, try reading the tracepoint code ;-)
H. Peter Anvin July 30, 2019, 1:42 p.m. UTC | #5
On July 30, 2019 1:08:43 AM PDT, Peter Zijlstra <peterz@infradead.org> wrote:
>On Mon, Jul 29, 2019 at 11:44:17PM +0300, Alexey Dobriyan wrote:
>> On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
>> > +#define _ASM_ARG1B	__ASM_FORM_RAW(dil)
>> > +#define _ASM_ARG2B	__ASM_FORM_RAW(sil)
>> > +#define _ASM_ARG3B	__ASM_FORM_RAW(dl)
>> > +#define _ASM_ARG4B	__ASM_FORM_RAW(cl)
>> > +#define _ASM_ARG5B	__ASM_FORM_RAW(r8b)
>> > +#define _ASM_ARG6B	__ASM_FORM_RAW(r9b)
>> 
>> I preprocessed percpu code once to see what precisely it does because
>> it was easier than wading through forest of macroes.
>
>Per cpu is easy, try reading the tracepoint code ;-)

Sometimes it's the .s file one ends up wanting to check out...

Patch
diff mbox series

--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -6,10 +6,8 @@ 
 
 #ifdef CONFIG_64BIT
 #define REG_IN "D"
-#define REG_OUT "a"
 #else
 #define REG_IN "a"
-#define REG_OUT "a"
 #endif
 
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
@@ -17,7 +15,7 @@  static __always_inline unsigned int __arch_hweight32(unsigned int w)
 	unsigned int res;
 
 	asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
-			 : "="REG_OUT (res)
+			 : "=a" (res)
 			 : REG_IN (w));
 
 	return res;
@@ -45,7 +43,7 @@  static __always_inline unsigned long __arch_hweight64(__u64 w)
 	unsigned long res;
 
 	asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
-			 : "="REG_OUT (res)
+			 : "=a" (res)
 			 : REG_IN (w));
 
 	return res;