* [PATCH] x86: drop REG_OUT macro from hweight functions
@ 2019-07-28 11:51 Alexey Dobriyan
2019-07-29 9:43 ` Peter Zijlstra
0 siblings, 1 reply; 6+ messages in thread
From: Alexey Dobriyan @ 2019-07-28 11:51 UTC (permalink / raw)
To: tglx, mingo, bp, hpa; +Cc: x86, linux-kernel
Output register is always RAX/EAX.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/include/asm/arch_hweight.h | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -6,10 +6,8 @@
#ifdef CONFIG_64BIT
#define REG_IN "D"
-#define REG_OUT "a"
#else
#define REG_IN "a"
-#define REG_OUT "a"
#endif
static __always_inline unsigned int __arch_hweight32(unsigned int w)
@@ -17,7 +15,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
unsigned int res;
asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
+ : "=a" (res)
: REG_IN (w));
return res;
@@ -45,7 +43,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;
asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
+ : "=a" (res)
: REG_IN (w));
return res;
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: drop REG_OUT macro from hweight functions
2019-07-28 11:51 [PATCH] x86: drop REG_OUT macro from hweight functions Alexey Dobriyan
@ 2019-07-29 9:43 ` Peter Zijlstra
2019-07-29 10:04 ` Peter Zijlstra
0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2019-07-29 9:43 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: tglx, mingo, bp, hpa, x86, linux-kernel
On Sun, Jul 28, 2019 at 02:51:40PM +0300, Alexey Dobriyan wrote:
> Output register is always RAX/EAX.
>
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
>
> arch/x86/include/asm/arch_hweight.h | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/include/asm/arch_hweight.h
> +++ b/arch/x86/include/asm/arch_hweight.h
> @@ -6,10 +6,8 @@
>
> #ifdef CONFIG_64BIT
> #define REG_IN "D"
> -#define REG_OUT "a"
> #else
> #define REG_IN "a"
> -#define REG_OUT "a"
> #endif
>
> static __always_inline unsigned int __arch_hweight32(unsigned int w)
> @@ -17,7 +15,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
> unsigned int res;
>
> asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> + : "=a" (res)
> : REG_IN (w));
>
> return res;
> @@ -45,7 +43,7 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
> unsigned long res;
>
> asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> + : "=a" (res)
> : REG_IN (w));
>
> return res;
I _think_ something like the below should also work:
(fwiw _ASM_ARG 5 and 6 are broken, as are all the QLWB variants)
---
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..88704612b2f7 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -3,22 +3,15 @@
#define _ASM_X86_HWEIGHT_H
#include <asm/cpufeatures.h>
-
-#ifdef CONFIG_64BIT
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
+#include <asm/asm.h>
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));
return res;
}
@@ -45,8 +38,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;
asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));
return res;
}
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: drop REG_OUT macro from hweight functions
2019-07-29 9:43 ` Peter Zijlstra
@ 2019-07-29 10:04 ` Peter Zijlstra
2019-07-29 20:44 ` Alexey Dobriyan
0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2019-07-29 10:04 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: tglx, mingo, bp, hpa, x86, linux-kernel
On Mon, Jul 29, 2019 at 11:43:29AM +0200, Peter Zijlstra wrote:
> I _think_ something like the below should also work:
>
> (fwiw _ASM_ARG 5 and 6 are broken, as are all the QLWB variants)
Fixed that, because
> ---
> diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
> index ba88edd0d58b..88704612b2f7 100644
> --- a/arch/x86/include/asm/arch_hweight.h
> +++ b/arch/x86/include/asm/arch_hweight.h
> @@ -3,22 +3,15 @@
> #define _ASM_X86_HWEIGHT_H
>
> #include <asm/cpufeatures.h>
> -
> -#ifdef CONFIG_64BIT
> -#define REG_IN "D"
> -#define REG_OUT "a"
> -#else
> -#define REG_IN "a"
> -#define REG_OUT "a"
> -#endif
> +#include <asm/asm.h>
>
> static __always_inline unsigned int __arch_hweight32(unsigned int w)
> {
> unsigned int res;
>
> asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
> - : "="REG_OUT (res)
> - : REG_IN (w));
> + : "=a" (res)
> + : _ASM_ARG1 (w));
That needs _ASM_ARG1L because popcntl..
---
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..3cab7f382a43 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -3,22 +3,15 @@
#define _ASM_X86_HWEIGHT_H
#include <asm/cpufeatures.h>
-
-#ifdef CONFIG_64BIT
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
+#include <asm/asm.h>
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1L (w));
return res;
}
@@ -45,8 +38,8 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
unsigned long res;
asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ : "=a" (res)
+ : _ASM_ARG1 (w));
return res;
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3ff577c0b102..0bb0bbcd4551 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -53,17 +53,17 @@
#define _ASM_ARG2 _ASM_DX
#define _ASM_ARG3 _ASM_CX
-#define _ASM_ARG1L eax
-#define _ASM_ARG2L edx
-#define _ASM_ARG3L ecx
+#define _ASM_ARG1L _ASM_ARG1
+#define _ASM_ARG2L _ASM_ARG2
+#define _ASM_ARG3L _ASM_ARG3
-#define _ASM_ARG1W ax
-#define _ASM_ARG2W dx
-#define _ASM_ARG3W cx
+#define _ASM_ARG1W __ASM_FORM_RAW(ax)
+#define _ASM_ARG2W __ASM_FORM_RAW(dx)
+#define _ASM_ARG3W __ASM_FORM_RAW(cx)
-#define _ASM_ARG1B al
-#define _ASM_ARG2B dl
-#define _ASM_ARG3B cl
+#define _ASM_ARG1B __ASM_FORM_RAW(al)
+#define _ASM_ARG2B __ASM_FORM_RAW(dl)
+#define _ASM_ARG3B __ASM_FORM_RAW(cl)
#else
/* 64 bit */
@@ -72,36 +72,29 @@
#define _ASM_ARG2 _ASM_SI
#define _ASM_ARG3 _ASM_DX
#define _ASM_ARG4 _ASM_CX
-#define _ASM_ARG5 r8
-#define _ASM_ARG6 r9
-
-#define _ASM_ARG1Q rdi
-#define _ASM_ARG2Q rsi
-#define _ASM_ARG3Q rdx
-#define _ASM_ARG4Q rcx
-#define _ASM_ARG5Q r8
-#define _ASM_ARG6Q r9
-
-#define _ASM_ARG1L edi
-#define _ASM_ARG2L esi
-#define _ASM_ARG3L edx
-#define _ASM_ARG4L ecx
-#define _ASM_ARG5L r8d
-#define _ASM_ARG6L r9d
-
-#define _ASM_ARG1W di
-#define _ASM_ARG2W si
-#define _ASM_ARG3W dx
-#define _ASM_ARG4W cx
-#define _ASM_ARG5W r8w
-#define _ASM_ARG6W r9w
-
-#define _ASM_ARG1B dil
-#define _ASM_ARG2B sil
-#define _ASM_ARG3B dl
-#define _ASM_ARG4B cl
-#define _ASM_ARG5B r8b
-#define _ASM_ARG6B r9b
+#define _ASM_ARG5 __ASM_REG(8)
+#define _ASM_ARG6 __ASM_REG(9)
+
+#define _ASM_ARG1L __ASM_FORM_RAW(edi)
+#define _ASM_ARG2L __ASM_FORM_RAW(esi)
+#define _ASM_ARG3L __ASM_FORM_RAW(edx)
+#define _ASM_ARG4L __ASM_FORM_RAW(ecx)
+#define _ASM_ARG5L __ASM_FORM_RAW(r8d)
+#define _ASM_ARG6L __ASM_FORM_RAW(r9d)
+
+#define _ASM_ARG1W __ASM_FORM_RAW(di)
+#define _ASM_ARG2W __ASM_FORM_RAW(si)
+#define _ASM_ARG3W __ASM_FORM_RAW(dx)
+#define _ASM_ARG4W __ASM_FORM_RAW(cx)
+#define _ASM_ARG5W __ASM_FORM_RAW(r8w)
+#define _ASM_ARG6W __ASM_FORM_RAW(r9w)
+
+#define _ASM_ARG1B __ASM_FORM_RAW(dil)
+#define _ASM_ARG2B __ASM_FORM_RAW(sil)
+#define _ASM_ARG3B __ASM_FORM_RAW(dl)
+#define _ASM_ARG4B __ASM_FORM_RAW(cl)
+#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
+#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
#endif
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: drop REG_OUT macro from hweight functions
2019-07-29 10:04 ` Peter Zijlstra
@ 2019-07-29 20:44 ` Alexey Dobriyan
2019-07-30 8:08 ` Peter Zijlstra
0 siblings, 1 reply; 6+ messages in thread
From: Alexey Dobriyan @ 2019-07-29 20:44 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: tglx, mingo, bp, hpa, x86, linux-kernel
On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
> +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
> +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
> +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
> +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
> +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
> +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
I preprocessed percpu code once to see what precisely it does because
it was easier than wading through forest of macroes.
Hopefully x86 assembly won't get to that level.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: drop REG_OUT macro from hweight functions
2019-07-29 20:44 ` Alexey Dobriyan
@ 2019-07-30 8:08 ` Peter Zijlstra
2019-07-30 13:42 ` hpa
0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2019-07-30 8:08 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: tglx, mingo, bp, hpa, x86, linux-kernel
On Mon, Jul 29, 2019 at 11:44:17PM +0300, Alexey Dobriyan wrote:
> On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
> > +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
> > +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
> > +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
> > +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
> > +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
> > +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
>
> I preprocessed percpu code once to see what precisely it does because
> it was easier than wading through forest of macroes.
Per cpu is easy, try reading the tracepoint code ;-)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: drop REG_OUT macro from hweight functions
2019-07-30 8:08 ` Peter Zijlstra
@ 2019-07-30 13:42 ` hpa
0 siblings, 0 replies; 6+ messages in thread
From: hpa @ 2019-07-30 13:42 UTC (permalink / raw)
To: Peter Zijlstra, Alexey Dobriyan; +Cc: tglx, mingo, bp, x86, linux-kernel
On July 30, 2019 1:08:43 AM PDT, Peter Zijlstra <peterz@infradead.org> wrote:
>On Mon, Jul 29, 2019 at 11:44:17PM +0300, Alexey Dobriyan wrote:
>> On Mon, Jul 29, 2019 at 12:04:47PM +0200, Peter Zijlstra wrote:
>> > +#define _ASM_ARG1B __ASM_FORM_RAW(dil)
>> > +#define _ASM_ARG2B __ASM_FORM_RAW(sil)
>> > +#define _ASM_ARG3B __ASM_FORM_RAW(dl)
>> > +#define _ASM_ARG4B __ASM_FORM_RAW(cl)
>> > +#define _ASM_ARG5B __ASM_FORM_RAW(r8b)
>> > +#define _ASM_ARG6B __ASM_FORM_RAW(r9b)
>>
>> I preprocessed percpu code once to see what precisely it does because
>> it was easier than wading through forest of macroes.
>
>Per cpu is easy, try reading the tracepoint code ;-)
Sometimes it's the .s file one ends up wanting to check out...
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-07-30 13:43 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-28 11:51 [PATCH] x86: drop REG_OUT macro from hweight functions Alexey Dobriyan
2019-07-29 9:43 ` Peter Zijlstra
2019-07-29 10:04 ` Peter Zijlstra
2019-07-29 20:44 ` Alexey Dobriyan
2019-07-30 8:08 ` Peter Zijlstra
2019-07-30 13:42 ` hpa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).