* [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro
@ 2019-08-13 9:59 Christophe Leroy
2019-08-13 9:59 ` [PATCH 2/2] powerpc/32: replace LOAD_MSR_KERNEL() by LOAD_REG_IMMEDIATE() Christophe Leroy
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Christophe Leroy @ 2019-08-13 9:59 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, segher
Cc: linuxppc-dev, linux-kernel
Today LOAD_REG_IMMEDIATE() is a basic #define which loads all
parts on a value into a register, including the parts that are NUL.
This means always 2 instructions on PPC32 and always 5 instructions
on PPC64. And those instructions cannot run in parallele as they are
updating the same register.
Ex: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in:
3c 20 00 00 lis r1,0
60 21 00 00 ori r1,r1,0
78 21 07 c6 rldicr r1,r1,32,31
64 21 00 00 oris r1,r1,0
60 21 40 00 ori r1,r1,16384
Rewrite LOAD_REG_IMMEDIATE() with GAS macro in order to skip
the parts that are NUL.
Rename existing LOAD_REG_IMMEDIATE() as LOAD_REG_IMMEDIATE_SYM()
and use that one for loading value of symbols which are not known
at compile time.
Now LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) in head_64.S results in:
38 20 40 00 li r1,16384
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/include/asm/ppc_asm.h | 42 +++++++++++++++++++++++++++++++-----
arch/powerpc/kernel/exceptions-64e.S | 10 ++++-----
arch/powerpc/kernel/head_64.S | 2 +-
3 files changed, 43 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index e0637730a8e7..9a7c2ca9b714 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -311,13 +311,43 @@ GLUE(.,name):
addis reg,reg,(name - 0b)@ha; \
addi reg,reg,(name - 0b)@l;
-#ifdef __powerpc64__
-#ifdef HAVE_AS_ATHIGH
+#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
#define __AS_ATHIGH high
#else
#define __AS_ATHIGH h
#endif
-#define LOAD_REG_IMMEDIATE(reg,expr) \
+
+.macro __LOAD_REG_IMMEDIATE_32 r, x
+ .if (\x) >= 0x8000 || (\x) < -0x8000
+ lis \r, (\x)@__AS_ATHIGH
+ .if (\x) & 0xffff != 0
+ ori \r, \r, (\x)@l
+ .endif
+ .else
+ li \r, (\x)@l
+ .endif
+.endm
+
+.macro __LOAD_REG_IMMEDIATE r, x
+ .if \x & ~0xffffffff != 0
+ __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+ rldicr \r, \r, 32, 31
+ .if (\x) & 0xffff0000 != 0
+ oris \r, \r, (\x)@__AS_ATHIGH
+ .endif
+ .if (\x) & 0xffff != 0
+ oris \r, \r, (\x)@l
+ .endif
+ .else
+ __LOAD_REG_IMMEDIATE_32 \r, \x
+ .endif
+.endm
+
+#ifdef __powerpc64__
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@highest; \
ori reg,reg,(expr)@higher; \
rldicr reg,reg,32,31; \
@@ -335,11 +365,13 @@ GLUE(.,name):
#else /* 32-bit */
-#define LOAD_REG_IMMEDIATE(reg,expr) \
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@ha; \
addi reg,reg,(expr)@l;
-#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE(reg, name)
+#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE_SYM(reg, name)
#define LOAD_REG_ADDRBASE(reg, name) lis reg,name@ha
#define ADDROFF(name) name@l
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..898aae6da167 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -751,8 +751,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+ LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
@@ -821,8 +821,8 @@ kernel_dbg_exc:
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
+ LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
@@ -1449,7 +1449,7 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r3,1f)
+ LOAD_REG_IMMEDIATE_SYM(r3,1f)
mtctr r3
bctr
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 91d297e696dd..1fd44761e997 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -635,7 +635,7 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+ LOAD_REG_IMMEDIATE_SYM(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
--
2.13.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] powerpc/32: replace LOAD_MSR_KERNEL() by LOAD_REG_IMMEDIATE()
2019-08-13 9:59 [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro Christophe Leroy
@ 2019-08-13 9:59 ` Christophe Leroy
2019-08-14 2:08 ` [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro Paul Mackerras
2019-08-14 20:10 ` Segher Boessenkool
2 siblings, 0 replies; 5+ messages in thread
From: Christophe Leroy @ 2019-08-13 9:59 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, segher
Cc: linuxppc-dev, linux-kernel
LOAD_MSR_KERNEL() and LOAD_REG_IMMEDIATE() are doing the same thing
in the same way. Drop LOAD_MSR_KERNEL()
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/entry_32.S | 18 +++++++++---------
arch/powerpc/kernel/head_32.h | 21 ++++-----------------
2 files changed, 13 insertions(+), 26 deletions(-)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 54fab22c9a43..972b05504a0a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -230,7 +230,7 @@ transfer_to_handler_cont:
*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
- LOAD_MSR_KERNEL(r0, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
SYNC
@@ -304,7 +304,7 @@ stack_ovf:
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@@ -324,7 +324,7 @@ trace_syscall_entry_irq_off:
bl trace_hardirqs_on
/* Now enable for real */
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr r10
REST_GPR(0, r1)
@@ -394,7 +394,7 @@ ret_from_syscall:
#endif
mr r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
@@ -824,7 +824,7 @@ ret_from_except:
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
@@ -991,7 +991,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
@@ -1066,7 +1066,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@@ -1236,7 +1236,7 @@ recheck:
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
lwz r9,TI_FLAGS(r2)
@@ -1329,7 +1329,7 @@ _GLOBAL(enter_rtas)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
- LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..8abc7783dbe5 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -5,19 +5,6 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
/*
- * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
- */
-.macro __LOAD_MSR_KERNEL r, x
-.if \x >= 0x8000
- lis \r, (\x)@h
- ori \r, \r, (\x)@l
-.else
- li \r, (\x)
-.endif
-.endm
-#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
-
-/*
* Exception entry code. This code runs with address translation
* turned off, i.e. using physical addresses.
* We assume sprg3 has the physical address of the current
@@ -92,7 +79,7 @@
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
MTMSRD(r10) /* (except for mach check in rtas) */
#endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -140,10 +127,10 @@
* otherwise we might risk taking an interrupt before we tell lockdep
* they are enabled.
*/
- LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
rlwimi r10, r9, 0, MSR_EE
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
#endif
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -187,7 +174,7 @@
#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
- LOAD_MSR_KERNEL(r10, msr); \
+ LOAD_REG_IMMEDIATE(r10, msr); \
bl tfer; \
.long hdlr; \
.long ret
--
2.13.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro
2019-08-13 9:59 [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro Christophe Leroy
2019-08-13 9:59 ` [PATCH 2/2] powerpc/32: replace LOAD_MSR_KERNEL() by LOAD_REG_IMMEDIATE() Christophe Leroy
@ 2019-08-14 2:08 ` Paul Mackerras
2019-08-14 9:46 ` Christophe Leroy
2019-08-14 20:10 ` Segher Boessenkool
2 siblings, 1 reply; 5+ messages in thread
From: Paul Mackerras @ 2019-08-14 2:08 UTC (permalink / raw)
To: Christophe Leroy; +Cc: linuxppc-dev, linux-kernel
On Tue, Aug 13, 2019 at 09:59:35AM +0000, Christophe Leroy wrote:
[snip]
> +.macro __LOAD_REG_IMMEDIATE r, x
> + .if \x & ~0xffffffff != 0
> + __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
> + rldicr \r, \r, 32, 31
> + .if (\x) & 0xffff0000 != 0
> + oris \r, \r, (\x)@__AS_ATHIGH
> + .endif
> + .if (\x) & 0xffff != 0
> + oris \r, \r, (\x)@l
> + .endif
> + .else
> + __LOAD_REG_IMMEDIATE_32 \r, \x
> + .endif
> +.endm
Doesn't this force all negative constants, even small ones, to use
the long sequence? For example, __LOAD_REG_IMMEDIATE r3, -1 will
generate (as far as I can see):
li r3, -1
rldicr r3, r3, 32, 31
oris r3, r3, 0xffff
ori r3, r3, 0xffff
which seems suboptimal.
Paul.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro
2019-08-14 2:08 ` [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro Paul Mackerras
@ 2019-08-14 9:46 ` Christophe Leroy
0 siblings, 0 replies; 5+ messages in thread
From: Christophe Leroy @ 2019-08-14 9:46 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linuxppc-dev, linux-kernel
Le 14/08/2019 à 04:08, Paul Mackerras a écrit :
> On Tue, Aug 13, 2019 at 09:59:35AM +0000, Christophe Leroy wrote:
>
> [snip]
>
>> +.macro __LOAD_REG_IMMEDIATE r, x
>> + .if \x & ~0xffffffff != 0
>> + __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
>> + rldicr \r, \r, 32, 31
>> + .if (\x) & 0xffff0000 != 0
>> + oris \r, \r, (\x)@__AS_ATHIGH
>> + .endif
>> + .if (\x) & 0xffff != 0
>> + oris \r, \r, (\x)@l
>> + .endif
>> + .else
>> + __LOAD_REG_IMMEDIATE_32 \r, \x
>> + .endif
>> +.endm
>
> Doesn't this force all negative constants, even small ones, to use
> the long sequence? For example, __LOAD_REG_IMMEDIATE r3, -1 will
> generate (as far as I can see):
>
> li r3, -1
> rldicr r3, r3, 32, 31
> oris r3, r3, 0xffff
> ori r3, r3, 0xffff
>
> which seems suboptimal.
Ah yes, thanks. And it is also buggy when \x is over 0x80000000 because
lis is a signed ops
I'll send v2
Christophe
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro
2019-08-13 9:59 [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro Christophe Leroy
2019-08-13 9:59 ` [PATCH 2/2] powerpc/32: replace LOAD_MSR_KERNEL() by LOAD_REG_IMMEDIATE() Christophe Leroy
2019-08-14 2:08 ` [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro Paul Mackerras
@ 2019-08-14 20:10 ` Segher Boessenkool
2 siblings, 0 replies; 5+ messages in thread
From: Segher Boessenkool @ 2019-08-14 20:10 UTC (permalink / raw)
To: Christophe Leroy; +Cc: linuxppc-dev, Paul Mackerras, linux-kernel
Hi Christophe,
On Tue, Aug 13, 2019 at 09:59:35AM +0000, Christophe Leroy wrote:
> + rldicr \r, \r, 32, 31
Could you please write this as
sldi \r, \r, 32
? It's much easier to read, imo (it's the exact same instruction).
You can do a lot cheaper sequences if you have a temporary reg, as well
(longest path of 3 insns instead of 5):
lis rt,A
ori rt,B
lis rd,C
ori rd,D
rldimi rd,rt,32,0
to load ABCD.
Segher
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2019-08-14 20:17 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-13 9:59 [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro Christophe Leroy
2019-08-13 9:59 ` [PATCH 2/2] powerpc/32: replace LOAD_MSR_KERNEL() by LOAD_REG_IMMEDIATE() Christophe Leroy
2019-08-14 2:08 ` [PATCH 1/2] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro Paul Mackerras
2019-08-14 9:46 ` Christophe Leroy
2019-08-14 20:10 ` Segher Boessenkool
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).