* [RFC][PATCH] powerpc/64s: stop using r14 register @ 2017-05-21 14:00 Nicholas Piggin 2017-05-21 22:09 ` Benjamin Herrenschmidt 2017-05-30 19:08 ` Naveen N. Rao 0 siblings, 2 replies; 7+ messages in thread From: Nicholas Piggin @ 2017-05-21 14:00 UTC (permalink / raw) To: linuxppc-dev; +Cc: Nicholas Piggin, Anton Blanchard I'd like to take over the r14 register for use as a per-cpu kernel register similar to the way r13 is used for the paca. r14 being the last non-volatile register gcc allocates, appears with about 0.5% the frequency as r31 in (static) instructions. I haven't counted dynamically how many extra spills and fills that removing it causes, but I should. My guess is the memory ops saved by using it as a per-cpu variable will significantly outweigh the cost of losing it as a general use register. This part of the patch is pretty mechanical. A couple of places (prom) still have to use it, and I haven't quite understood the KVM code yet. Question is whether this approach seems okay, and whether we should do the same for 64e. Thanks, Nick --- arch/powerpc/Makefile | 1 + arch/powerpc/crypto/md5-asm.S | 40 +++---- arch/powerpc/crypto/sha1-powerpc-asm.S | 10 +- arch/powerpc/include/asm/ppc_asm.h | 21 +++- arch/powerpc/kernel/asm-offsets.c | 4 +- arch/powerpc/kernel/entry_32.S | 4 +- arch/powerpc/kernel/entry_64.S | 46 ++++---- arch/powerpc/kernel/exceptions-64s.S | 3 +- arch/powerpc/kernel/head_64.S | 8 +- arch/powerpc/kernel/idle_book3s.S | 88 +++++++------- arch/powerpc/kernel/process.c | 4 +- arch/powerpc/kernel/tm.S | 30 ++--- arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 4 +- arch/powerpc/kvm/book3s_hv_interrupts.S | 5 +- arch/powerpc/lib/checksum_64.S | 66 +++++------ arch/powerpc/lib/copypage_power7.S | 32 +++--- arch/powerpc/lib/copyuser_power7.S | 152 ++++++++++++------------- arch/powerpc/lib/crtsavres.S | 3 + arch/powerpc/lib/memcpy_power7.S | 80 ++++++------- arch/powerpc/net/bpf_jit32.h | 12 +- arch/powerpc/net/bpf_jit_asm.S | 4 +- 21 files changed, 321 insertions(+), 296 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index bc4791aecd03..4c3492851fab 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -137,6 +137,7 @@ endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) +CFLAGS-$(CONFIG_PPC64) += -ffixed-r13 -ffixed-r14 CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) ifeq ($(CONFIG_PPC_BOOK3S_64),y) diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S index 10cdf5bceebb..99e41af88e19 100644 --- a/arch/powerpc/crypto/md5-asm.S +++ b/arch/powerpc/crypto/md5-asm.S @@ -25,31 +25,31 @@ #define rW02 r10 #define rW03 r11 #define rW04 r12 -#define rW05 r14 -#define rW06 r15 -#define rW07 r16 -#define rW08 r17 -#define rW09 r18 -#define rW10 r19 -#define rW11 r20 -#define rW12 r21 -#define rW13 r22 -#define rW14 r23 -#define rW15 r24 - -#define rT0 r25 -#define rT1 r26 +#define rW05 r15 +#define rW06 r16 +#define rW07 r17 +#define rW08 r18 +#define rW09 r19 +#define rW10 r20 +#define rW11 r21 +#define rW12 r22 +#define rW13 r23 +#define rW14 r24 +#define rW15 r25 + +#define rT0 r26 +#define rT1 r27 #define INITIALIZE \ PPC_STLU r1,-INT_FRAME_SIZE(r1); \ - SAVE_8GPRS(14, r1); /* push registers onto stack */ \ - SAVE_4GPRS(22, r1); \ - SAVE_GPR(26, r1) + SAVE_8GPRS(15, r1); /* push registers onto stack */ \ + SAVE_4GPRS(23, r1); \ + SAVE_GPR(27, r1) #define FINALIZE \ - REST_8GPRS(14, r1); /* pop registers from stack */ \ - REST_4GPRS(22, r1); \ - REST_GPR(26, r1); \ + REST_8GPRS(15, r1); /* pop registers from stack */ \ + REST_4GPRS(23, r1); \ + REST_GPR(27, r1); \ addi r1,r1,INT_FRAME_SIZE; #ifdef __BIG_ENDIAN__ diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S index 82ddc9bdfeb1..56bc6ac942c6 100644 --- a/arch/powerpc/crypto/sha1-powerpc-asm.S +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S @@ -41,10 +41,10 @@ or r6,r6,r0; \ add r0,RE(t),r15; \ add RT(t),RT(t),r6; \ - add r14,r0,W(t); \ + add r6,r0,W(t); \ LWZ(W((t)+4),((t)+4)*4,r4); \ rotlwi RB(t),RB(t),30; \ - add RT(t),RT(t),r14 + add RT(t),RT(t),r6 #define STEPD0_UPDATE(t) \ and r6,RB(t),RC(t); \ @@ -123,8 +123,7 @@ _GLOBAL(powerpc_sha_transform) PPC_STLU r1,-INT_FRAME_SIZE(r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_NVGPRS(r1) /* Load up A - E */ lwz RA(0),0(r3) /* A */ @@ -182,7 +181,6 @@ _GLOBAL(powerpc_sha_transform) stw RD(0),12(r3) stw RE(0),16(r3) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_NVGPRS(r1) addi r1,r1,INT_FRAME_SIZE blr diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 359c44341761..ed696de5888b 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -10,6 +10,16 @@ #include <asm/ppc-opcode.h> #include <asm/firmware.h> +#ifdef __powerpc64__ +#ifdef CONFIG_PPC_BOOK3S +#define FIRST_NVGPR 15 +#else +#define FIRST_NVGPR 14 +#endif +#else +#define FIRST_NVGPR 13 +#endif + #ifdef __ASSEMBLY__ #define SZL (BITS_PER_LONG/8) @@ -75,16 +85,21 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #ifdef __powerpc64__ #define SAVE_GPR(n, base) std n,GPR0+8*(n)(base) #define REST_GPR(n, base) ld n,GPR0+8*(n)(base) +#ifdef CONFIG_PPC_BOOK3S +#define SAVE_NVGPRS(base) SAVE_GPR(15, base); SAVE_2GPRS(16, base); SAVE_4GPRS(18, base); SAVE_10GPRS(22, base) +#define REST_NVGPRS(base) REST_GPR(15, base); REST_2GPRS(16, base); REST_4GPRS(18, base); REST_10GPRS(22, base) +#else /* CONFIG_PPC_BOOK3S */ #define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) #define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base) -#else +#endif /* CONFIG_PPC_BOOK3S */ +#else /* __powerpc64__ */ #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) #define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); \ SAVE_10GPRS(22, base) #define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); \ REST_10GPRS(22, base) -#endif +#endif /* __powerpc64__ */ #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) #define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) @@ -184,7 +199,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #ifdef CONFIG_PPC64 #define STACKFRAMESIZE 256 -#define __STK_REG(i) (112 + ((i)-14)*8) +#define __STK_REG(i) (112 + ((i)-15)*8) #define STK_REG(i) __STK_REG(__REG_##i) #ifdef PPC64_ELF_ABI_v2 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 709e23425317..49e849990f9f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -283,9 +283,9 @@ int main(void) STACK_PT_REGS_OFFSET(GPR11, gpr[11]); STACK_PT_REGS_OFFSET(GPR12, gpr[12]); STACK_PT_REGS_OFFSET(GPR13, gpr[13]); -#ifndef CONFIG_PPC64 +#ifndef CONFIG_PPC_BOOK3E_64 STACK_PT_REGS_OFFSET(GPR14, gpr[14]); -#endif /* CONFIG_PPC64 */ +#endif /* * Note: these symbols include _ because they overlap with special * register names diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8587059ad848..9ffea7c7764f 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -451,8 +451,8 @@ ret_from_fork: ret_from_kernel_thread: REST_NVGPRS(r1) bl schedule_tail - mtlr r14 - mr r3,r15 + mtlr FIRST_NVGPR + mr r3,FIRST_NVGPR+1 PPC440EP_ERR42 blrl li r3,0 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b8b6069309da..8db0f378e8b0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -38,6 +38,7 @@ #include <asm/tm.h> #include <asm/ppc-opcode.h> #include <asm/export.h> +#include <asm/exception-64s.h> /* * System calls. @@ -405,7 +406,7 @@ _GLOBAL(save_nvgprs) * The sigsuspend and rt_sigsuspend system calls can call do_signal * and thus put the process into the stopped state where we might * want to examine its user state with ptrace. Therefore we need - * to save all the nonvolatile registers (r14 - r31) before calling + * to save all the nonvolatile registers (r15 - r31) before calling * the C code. Similarly, fork, vfork and clone need the full * register state on the stack so that it can be copied to the child. */ @@ -449,10 +450,10 @@ _GLOBAL(ret_from_fork) _GLOBAL(ret_from_kernel_thread) bl schedule_tail REST_NVGPRS(r1) - mtlr r14 - mr r3,r15 + mtlr FIRST_NVGPR + mr r3,FIRST_NVGPR+1 #ifdef PPC64_ELF_ABI_v2 - mr r12,r14 + mr r12,FIRST_NVGPR #endif blrl li r3,0 @@ -481,9 +482,7 @@ _GLOBAL(_switch) mflr r0 std r0,16(r1) stdu r1,-SWITCH_FRAME_SIZE(r1) - /* r3-r13 are caller saved -- Cort */ - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_NVGPRS(r1) std r0,_NIP(r1) /* Return to switch caller */ mfcr r23 std r23,_CCR(r1) @@ -590,9 +589,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) ld r6,_CCR(r1) mtcrf 0xFF,r6 - /* r3-r13 are destroyed -- Cort */ - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + /* Volatile regs are destroyed */ + REST_NVGPRS(r1) /* convert old thread to its task_struct for return value */ addi r3,r3,-THREAD @@ -980,12 +978,14 @@ _GLOBAL(enter_rtas) /* Because RTAS is running in 32b mode, it clobbers the high order half * of all registers that it saves. We therefore save those registers - * RTAS might touch to the stack. (r0, r3-r13 are caller saved) + * RTAS might touch to the stack. (r0, r3-r12 are caller saved) */ SAVE_GPR(2, r1) /* Save the TOC */ SAVE_GPR(13, r1) /* Save paca */ - SAVE_8GPRS(14, r1) /* Save the non-volatiles */ - SAVE_10GPRS(22, r1) /* ditto */ +#ifdef CONFIG_PPC_BOOK3S + SAVE_GPR(14, r1) /* Save r14 */ +#endif + SAVE_NVGPRS(r1) /* Save the non-volatiles */ mfcr r4 std r4,_CCR(r1) @@ -1083,8 +1083,10 @@ rtas_restore_regs: /* relocation is on at this point */ REST_GPR(2, r1) /* Restore the TOC */ REST_GPR(13, r1) /* Restore paca */ - REST_8GPRS(14, r1) /* Restore the non-volatiles */ - REST_10GPRS(22, r1) /* ditto */ +#ifdef CONFIG_PPC_BOOK3S + REST_GPR(14, r1) /* Restore r14 */ +#endif + REST_NVGPRS(r1) /* Restore the non-volatiles */ GET_PACA(r13) @@ -1114,12 +1116,14 @@ _GLOBAL(enter_prom) /* Because PROM is running in 32b mode, it clobbers the high order half * of all registers that it saves. We therefore save those registers - * PROM might touch to the stack. (r0, r3-r13 are caller saved) + * PROM might touch to the stack. (r0, r3-r14 are caller saved) */ SAVE_GPR(2, r1) SAVE_GPR(13, r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) +#ifdef CONFIG_PPC_BOOK3S + SAVE_GPR(14, r1) +#endif + SAVE_NVGPRS(r1) mfcr r10 mfmsr r11 std r10,_CCR(r1) @@ -1163,8 +1167,10 @@ _GLOBAL(enter_prom) /* Restore other registers */ REST_GPR(2, r1) REST_GPR(13, r1) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) +#ifdef CONFIG_PPC_BOOK3S + REST_GPR(14, r1) +#endif + REST_NVGPRS(r1) ld r4,_CCR(r1) mtcr r4 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cf6dd08493cb..5c1d10c09c4e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1536,8 +1536,7 @@ BEGIN_FTR_SECTION ld r10,EX_CFAR(r3) std r10,ORIG_GPR3(r1) END_FTR_SECTION_IFSET(CPU_FTR_CFAR) - SAVE_8GPRS(14,r1) - SAVE_10GPRS(22,r1) + SAVE_NVGPRS(r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) addi r11,r1,INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index e43116237944..ffe46b5558e4 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -796,9 +796,9 @@ __secondary_start: /* Initialize the kernel stack */ LOAD_REG_ADDR(r3, current_set) sldi r28,r24,3 /* get current_set[cpu#] */ - ldx r14,r3,r28 - addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD - std r14,PACAKSAVE(r13) + ldx r15,r3,r28 + addi r15,r15,THREAD_SIZE-STACK_FRAME_OVERHEAD + std r15,PACAKSAVE(r13) /* Do early setup for that CPU (SLB and hash table pointer) */ bl early_setup_secondary @@ -807,7 +807,7 @@ __secondary_start: * setup the new stack pointer, but *don't* use this until * translation is on. */ - mr r1, r14 + mr r1, r15 /* Clear backchain so we get nice backtraces */ li r7,0 diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 07d4e0ad60db..8c84ab501236 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -87,19 +87,19 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) /* * Used by threads when the lock bit of core_idle_state is set. * Threads will spin in HMT_LOW until the lock bit is cleared. - * r14 - pointer to core_idle_state - * r15 - used to load contents of core_idle_state + * r15 - pointer to core_idle_state + * r16 - used to load contents of core_idle_state * r9 - used as a temporary variable */ core_idle_lock_held: HMT_LOW -3: lwz r15,0(r14) - andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h +3: lwz r16,0(r15) + andis. r16,r16,PNV_CORE_IDLE_LOCK_BIT@h bne 3b HMT_MEDIUM - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + lwarx r16,0,r15 + andis. r9,r16,PNV_CORE_IDLE_LOCK_BIT@h bne- core_idle_lock_held blr @@ -209,21 +209,21 @@ pnv_enter_arch207_idle_mode: 2: /* Sleep or winkle */ lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + ld r15,PACA_CORE_IDLE_STATE_PTR(r13) li r5,0 beq cr3,3f lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h 3: lwarx_loop1: - lwarx r15,0,r14 + lwarx r16,0,r15 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + andis. r9,r16,PNV_CORE_IDLE_LOCK_BIT@h bnel- core_idle_lock_held - add r15,r15,r5 /* Add if winkle */ - andc r15,r15,r7 /* Clear thread bit */ + add r16,r16,r5 /* Add if winkle */ + andc r16,r16,r7 /* Clear thread bit */ - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS + andi. r9,r16,PNV_CORE_IDLE_THREAD_BITS /* * If cr0 = 0, then current thread is the last thread of the core entering @@ -237,7 +237,7 @@ lwarx_loop1: pnv_fastsleep_workaround_at_entry: beq fastsleep_workaround_at_entry - stwcx. r15,0,r14 + stwcx. r16,0,r15 bne- lwarx_loop1 isync @@ -246,8 +246,8 @@ common_enter: /* common code for all the threads entering sleep or winkle */ IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) fastsleep_workaround_at_entry: - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - stwcx. r15,0,r14 + oris r16,r16,PNV_CORE_IDLE_LOCK_BIT@h + stwcx. r16,0,r15 bne- lwarx_loop1 isync @@ -257,9 +257,9 @@ fastsleep_workaround_at_entry: bl opal_config_cpu_idle_state /* Unlock */ - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h + xoris r16,r16,PNV_CORE_IDLE_LOCK_BIT@h lwsync - stw r15,0(r14) + stw r16,0(r15) b common_enter enter_winkle: @@ -303,15 +303,15 @@ power_enter_stop: * stack and enter stop */ lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + ld r15,PACA_CORE_IDLE_STATE_PTR(r13) lwarx_loop_stop: - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + lwarx r16,0,r15 + andis. r9,r16,PNV_CORE_IDLE_LOCK_BIT@h bnel- core_idle_lock_held - andc r15,r15,r7 /* Clear thread bit */ + andc r16,r16,r7 /* Clear thread bit */ - stwcx. r15,0,r14 + stwcx. r16,0,r15 bne- lwarx_loop_stop isync @@ -567,14 +567,14 @@ pnv_wakeup_tb_loss: * is required to return back to reset vector after hypervisor state * restore is complete. */ - mr r18,r4 - mflr r17 - mfspr r16,SPRN_SRR1 + mr r19,r4 + mflr r18 + mfspr r17,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + ld r15,PACA_CORE_IDLE_STATE_PTR(r13) lbz r7,PACA_THREAD_MASK(r13) /* @@ -588,15 +588,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * In either case loop until the lock bit is cleared. */ 1: - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h + lwarx r16,0,r15 + andis. r9,r16,PNV_CORE_IDLE_LOCK_BIT@h bnel- core_idle_lock_held - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - stwcx. r15,0,r14 + oris r16,r16,PNV_CORE_IDLE_LOCK_BIT@h + stwcx. r16,0,r15 bne- 1b isync - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS + andi. r9,r16,PNV_CORE_IDLE_THREAD_BITS cmpwi cr2,r9,0 /* @@ -660,29 +660,29 @@ BEGIN_FTR_SECTION * } * */ - cmpwi r18,PNV_THREAD_WINKLE + cmpwi r19,PNV_THREAD_WINKLE bne 2f - andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h - subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h + andis. r9,r16,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h + subis r16,r16,PNV_CORE_IDLE_WINKLE_COUNT@h beq 2f - ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ + ori r16,r16,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ 2: /* Shift thread bit to winkle mask, then test if this thread is set, * and remove it from the winkle bits */ slwi r8,r7,8 - and r8,r8,r15 - andc r15,r15,r8 + and r8,r8,r16 + andc r16,r16,r8 cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */ lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 + and r4,r4,r16 cmpwi r4,0 /* Check if first in subcore */ - or r15,r15,r7 /* Set thread bit */ + or r16,r16,r7 /* Set thread bit */ beq first_thread_in_subcore END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - or r15,r15,r7 /* Set thread bit */ + or r16,r16,r7 /* Set thread bit */ beq cr2,first_thread_in_core /* Not first thread in core or subcore to wake up */ @@ -758,9 +758,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_WORC,r4 clear_lock: - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h + xoris r16,r16,PNV_CORE_IDLE_LOCK_BIT@h lwsync - stw r15,0(r14) + stw r16,0(r15) common_exit: /* @@ -814,8 +814,8 @@ no_segments: hypervisor_state_restored: - mtspr SPRN_SRR1,r16 - mtlr r17 + mtspr SPRN_SRR1,r17 + mtlr r18 blr /* return to pnv_powersave_wakeup */ fastsleep_workaround_at_exit: diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d645da302bf2..868835bb64c3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1478,12 +1478,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->gpr[1] = sp + sizeof(struct pt_regs); /* function */ if (usp) - childregs->gpr[14] = ppc_function_entry((void *)usp); + childregs->gpr[FIRST_NVGPR] = ppc_function_entry((void *)usp); #ifdef CONFIG_PPC64 clear_tsk_thread_flag(p, TIF_32BIT); childregs->softe = 1; #endif - childregs->gpr[15] = kthread_arg; + childregs->gpr[FIRST_NVGPR + 1] = kthread_arg; p->thread.regs = NULL; /* no user register state */ ti->flags |= _TIF_RESTOREALL; f = ret_from_kernel_thread; diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3a2d04134da9..cc953bddeec4 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -112,24 +112,24 @@ _GLOBAL(tm_reclaim) SAVE_NVGPRS(r1) /* We need to setup MSR for VSX register save instructions. */ - mfmsr r14 - mr r15, r14 - ori r15, r15, MSR_FP - li r16, 0 - ori r16, r16, MSR_EE /* IRQs hard off */ - andc r15, r15, r16 - oris r15, r15, MSR_VEC@h + mfmsr r15 + mr r16, r15 + ori r16, r16, MSR_FP + li r17, 0 + ori r17, r17, MSR_EE /* IRQs hard off */ + andc r16, r16, r17 + oris r16, r16, MSR_VEC@h #ifdef CONFIG_VSX BEGIN_FTR_SECTION - oris r15,r15, MSR_VSX@h + oris r16,r16, MSR_VSX@h END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - mtmsrd r15 - std r14, TM_FRAME_L0(r1) + mtmsrd r16 + std r15, TM_FRAME_L0(r1) /* Do sanity check on MSR to make sure we are suspended */ li r7, (MSR_TS_S)@higher - srdi r6, r14, 32 + srdi r6, r15, 32 and r6, r6, r7 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 @@ -291,11 +291,11 @@ dont_backup_fp: /* AMR is checkpointed too, but is unsupported by Linux. */ /* Restore original MSR/IRQ state & clear TM mode */ - ld r14, TM_FRAME_L0(r1) /* Orig MSR */ + ld r15, TM_FRAME_L0(r1) /* Orig MSR */ - li r15, 0 - rldimi r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1 - mtmsrd r14 + li r16, 0 + rldimi r15, r16, MSR_TS_LG, (63-MSR_TS_LG)-1 + mtmsrd r15 REST_NVGPRS(r1) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 7c933a99f5d5..e1f7f4c6767a 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -72,7 +72,7 @@ _GLOBAL(ftrace_caller) ld r5,0(r3) #ifdef CONFIG_LIVEPATCH - mr r14,r7 /* remember old NIP */ + mr r15,r7 /* remember old NIP */ #endif /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE @@ -99,7 +99,7 @@ ftrace_call: ld r3, _NIP(r1) mtctr r3 #ifdef CONFIG_LIVEPATCH - cmpd r14,r3 /* has NIP been altered? */ + cmpd r15,r3 /* has NIP been altered? */ #endif /* Restore gprs */ diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 0fdc4a28970b..5d5a27c5c1ae 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -46,7 +46,7 @@ _GLOBAL(__kvmppc_vcore_entry) /* Save host state to the stack */ stdu r1, -SWITCH_FRAME_SIZE(r1) - /* Save non-volatile registers (r14 - r31) and CR */ + /* Save non-volatile registers (r15 - r31) and CR */ SAVE_NVGPRS(r1) mfcr r3 std r3, _CCR(r1) @@ -145,9 +145,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * R2 = host R2 * R12 = exit handler id * R13 = PACA + * R14 = ? XXX */ - /* Restore non-volatile host registers (r14 - r31) and CR */ + /* Restore non-volatile host registers (r15 - r31) and CR */ REST_NVGPRS(r1) ld r4, _CCR(r1) mtcr r4 diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 47e06147c92c..4e1c4e560a3b 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -65,9 +65,9 @@ _GLOBAL(__csum_partial) mtctr r6 stdu r1,-STACKFRAMESIZE(r1) - std r14,STK_REG(R14)(r1) std r15,STK_REG(R15)(r1) std r16,STK_REG(R16)(r1) + std r17,STK_REG(R17)(r1) ld r6,0(r3) ld r9,8(r3) @@ -85,11 +85,11 @@ _GLOBAL(__csum_partial) 2: adde r0,r0,r6 ld r12,32(r3) - ld r14,40(r3) + ld r15,40(r3) adde r0,r0,r9 - ld r15,48(r3) - ld r16,56(r3) + ld r16,48(r3) + ld r17,56(r3) addi r3,r3,64 adde r0,r0,r10 @@ -98,13 +98,13 @@ _GLOBAL(__csum_partial) adde r0,r0,r12 - adde r0,r0,r14 - adde r0,r0,r15 + + adde r0,r0,r16 ld r6,0(r3) ld r9,8(r3) - adde r0,r0,r16 + adde r0,r0,r17 ld r10,16(r3) ld r11,24(r3) bdnz 2b @@ -112,23 +112,23 @@ _GLOBAL(__csum_partial) adde r0,r0,r6 ld r12,32(r3) - ld r14,40(r3) + ld r15,40(r3) adde r0,r0,r9 - ld r15,48(r3) - ld r16,56(r3) + ld r16,48(r3) + ld r17,56(r3) addi r3,r3,64 adde r0,r0,r10 adde r0,r0,r11 adde r0,r0,r12 - adde r0,r0,r14 adde r0,r0,r15 adde r0,r0,r16 + adde r0,r0,r17 - ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) addi r1,r1,STACKFRAMESIZE andi. r4,r4,63 @@ -259,9 +259,9 @@ dstnr; sth r6,0(r4) mtctr r6 stdu r1,-STACKFRAMESIZE(r1) - std r14,STK_REG(R14)(r1) std r15,STK_REG(R15)(r1) std r16,STK_REG(R16)(r1) + std r17,STK_REG(R17)(r1) source; ld r6,0(r3) source; ld r9,8(r3) @@ -279,11 +279,11 @@ source; ld r11,24(r3) 2: adde r0,r0,r6 source; ld r12,32(r3) -source; ld r14,40(r3) +source; ld r15,40(r3) adde r0,r0,r9 -source; ld r15,48(r3) -source; ld r16,56(r3) +source; ld r16,48(r3) +source; ld r17,56(r3) addi r3,r3,64 adde r0,r0,r10 @@ -296,18 +296,18 @@ dest; std r11,24(r4) adde r0,r0,r12 dest; std r12,32(r4) -dest; std r14,40(r4) +dest; std r15,40(r4) - adde r0,r0,r14 -dest; std r15,48(r4) -dest; std r16,56(r4) + adde r0,r0,r15 +dest; std r16,48(r4) +dest; std r17,56(r4) addi r4,r4,64 - adde r0,r0,r15 + adde r0,r0,r16 source; ld r6,0(r3) source; ld r9,8(r3) - adde r0,r0,r16 + adde r0,r0,r17 source; ld r10,16(r3) source; ld r11,24(r3) bdnz 2b @@ -315,11 +315,11 @@ source; ld r11,24(r3) adde r0,r0,r6 source; ld r12,32(r3) -source; ld r14,40(r3) +source; ld r15,40(r3) adde r0,r0,r9 -source; ld r15,48(r3) -source; ld r16,56(r3) +source; ld r16,48(r3) +source; ld r17,56(r3) addi r3,r3,64 adde r0,r0,r10 @@ -332,19 +332,19 @@ dest; std r11,24(r4) adde r0,r0,r12 dest; std r12,32(r4) -dest; std r14,40(r4) +dest; std r15,40(r4) - adde r0,r0,r14 -dest; std r15,48(r4) -dest; std r16,56(r4) + adde r0,r0,r15 +dest; std r16,48(r4) +dest; std r17,56(r4) addi r4,r4,64 - adde r0,r0,r15 adde r0,r0,r16 + adde r0,r0,r17 - ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) addi r1,r1,STACKFRAMESIZE andi. r5,r5,63 @@ -407,9 +407,9 @@ dstnr; stb r6,0(r4) blr .Lsrc_error: - ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) addi r1,r1,STACKFRAMESIZE .Lsrc_error_nr: cmpdi 0,r7,0 @@ -419,9 +419,9 @@ dstnr; stb r6,0(r4) blr .Ldest_error: - ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) addi r1,r1,STACKFRAMESIZE .Ldest_error_nr: cmpdi 0,r8,0 diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index c517c27fe43c..8e65d4ea0ee4 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -114,13 +114,13 @@ _GLOBAL(copypage_power7) #endif .Lnonvmx_copy: - std r14,STK_REG(R14)(r1) std r15,STK_REG(R15)(r1) std r16,STK_REG(R16)(r1) std r17,STK_REG(R17)(r1) std r18,STK_REG(R18)(r1) std r19,STK_REG(R19)(r1) std r20,STK_REG(R20)(r1) + std r21,STK_REG(R21)(r1) 1: ld r0,0(r4) ld r5,8(r4) @@ -131,13 +131,13 @@ _GLOBAL(copypage_power7) ld r10,48(r4) ld r11,56(r4) ld r12,64(r4) - ld r14,72(r4) - ld r15,80(r4) - ld r16,88(r4) - ld r17,96(r4) - ld r18,104(r4) - ld r19,112(r4) - ld r20,120(r4) + ld r15,72(r4) + ld r16,80(r4) + ld r17,88(r4) + ld r18,96(r4) + ld r19,104(r4) + ld r20,112(r4) + ld r21,120(r4) addi r4,r4,128 std r0,0(r3) std r5,8(r3) @@ -148,22 +148,22 @@ _GLOBAL(copypage_power7) std r10,48(r3) std r11,56(r3) std r12,64(r3) - std r14,72(r3) - std r15,80(r3) - std r16,88(r3) - std r17,96(r3) - std r18,104(r3) - std r19,112(r3) - std r20,120(r3) + std r15,72(r3) + std r16,80(r3) + std r17,88(r3) + std r18,96(r3) + std r19,104(r3) + std r20,112(r3) + std r21,120(r3) addi r3,r3,128 bdnz 1b - ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) ld r17,STK_REG(R17)(r1) ld r18,STK_REG(R18)(r1) ld r19,STK_REG(R19)(r1) ld r20,STK_REG(R20)(r1) + ld r21,STK_REG(R21)(r1) addi r1,r1,STACKFRAMESIZE blr diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 5d6ccd75b433..8f17ad74da16 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -51,9 +51,9 @@ .Ldo_err4: - ld r16,STK_REG(R16)(r1) - ld r15,STK_REG(R15)(r1) - ld r14,STK_REG(R14)(r1) + ld r17,STK_REG(R16)(r1) + ld r16,STK_REG(R15)(r1) + ld r15,STK_REG(R14)(r1) .Ldo_err3: bl exit_vmx_usercopy ld r0,STACKFRAMESIZE+16(r1) @@ -62,15 +62,15 @@ #endif /* CONFIG_ALTIVEC */ .Ldo_err2: - ld r22,STK_REG(R22)(r1) - ld r21,STK_REG(R21)(r1) - ld r20,STK_REG(R20)(r1) - ld r19,STK_REG(R19)(r1) - ld r18,STK_REG(R18)(r1) - ld r17,STK_REG(R17)(r1) - ld r16,STK_REG(R16)(r1) - ld r15,STK_REG(R15)(r1) - ld r14,STK_REG(R14)(r1) + ld r23,STK_REG(R22)(r1) + ld r22,STK_REG(R21)(r1) + ld r21,STK_REG(R20)(r1) + ld r20,STK_REG(R19)(r1) + ld r19,STK_REG(R18)(r1) + ld r18,STK_REG(R17)(r1) + ld r17,STK_REG(R16)(r1) + ld r16,STK_REG(R15)(r1) + ld r15,STK_REG(R14)(r1) .Lexit: addi r1,r1,STACKFRAMESIZE .Ldo_err1: @@ -131,15 +131,15 @@ err1; stw r0,0(r3) mflr r0 stdu r1,-STACKFRAMESIZE(r1) - std r14,STK_REG(R14)(r1) - std r15,STK_REG(R15)(r1) - std r16,STK_REG(R16)(r1) - std r17,STK_REG(R17)(r1) - std r18,STK_REG(R18)(r1) - std r19,STK_REG(R19)(r1) - std r20,STK_REG(R20)(r1) - std r21,STK_REG(R21)(r1) - std r22,STK_REG(R22)(r1) + std r15,STK_REG(R14)(r1) + std r16,STK_REG(R15)(r1) + std r17,STK_REG(R16)(r1) + std r18,STK_REG(R17)(r1) + std r19,STK_REG(R18)(r1) + std r20,STK_REG(R19)(r1) + std r21,STK_REG(R20)(r1) + std r22,STK_REG(R21)(r1) + std r23,STK_REG(R22)(r1) std r0,STACKFRAMESIZE+16(r1) srdi r6,r5,7 @@ -156,14 +156,14 @@ err2; ld r9,32(r4) err2; ld r10,40(r4) err2; ld r11,48(r4) err2; ld r12,56(r4) -err2; ld r14,64(r4) -err2; ld r15,72(r4) -err2; ld r16,80(r4) -err2; ld r17,88(r4) -err2; ld r18,96(r4) -err2; ld r19,104(r4) -err2; ld r20,112(r4) -err2; ld r21,120(r4) +err2; ld r15,64(r4) +err2; ld r16,72(r4) +err2; ld r17,80(r4) +err2; ld r18,88(r4) +err2; ld r19,96(r4) +err2; ld r20,104(r4) +err2; ld r21,112(r4) +err2; ld r22,120(r4) addi r4,r4,128 err2; std r0,0(r3) err2; std r6,8(r3) @@ -173,28 +173,28 @@ err2; std r9,32(r3) err2; std r10,40(r3) err2; std r11,48(r3) err2; std r12,56(r3) -err2; std r14,64(r3) -err2; std r15,72(r3) -err2; std r16,80(r3) -err2; std r17,88(r3) -err2; std r18,96(r3) -err2; std r19,104(r3) -err2; std r20,112(r3) -err2; std r21,120(r3) +err2; std r15,64(r3) +err2; std r16,72(r3) +err2; std r17,80(r3) +err2; std r18,88(r3) +err2; std r19,96(r3) +err2; std r20,104(r3) +err2; std r21,112(r3) +err2; std r22,120(r3) addi r3,r3,128 bdnz 4b clrldi r5,r5,(64-7) - ld r14,STK_REG(R14)(r1) - ld r15,STK_REG(R15)(r1) - ld r16,STK_REG(R16)(r1) - ld r17,STK_REG(R17)(r1) - ld r18,STK_REG(R18)(r1) - ld r19,STK_REG(R19)(r1) - ld r20,STK_REG(R20)(r1) - ld r21,STK_REG(R21)(r1) - ld r22,STK_REG(R22)(r1) + ld r15,STK_REG(R14)(r1) + ld r16,STK_REG(R15)(r1) + ld r17,STK_REG(R16)(r1) + ld r18,STK_REG(R17)(r1) + ld r19,STK_REG(R18)(r1) + ld r20,STK_REG(R19)(r1) + ld r21,STK_REG(R20)(r1) + ld r22,STK_REG(R21)(r1) + ld r23,STK_REG(R22)(r1) addi r1,r1,STACKFRAMESIZE /* Up to 127B to go */ @@ -405,14 +405,14 @@ err3; stvx v0,r3,r11 7: sub r5,r5,r6 srdi r6,r5,7 - std r14,STK_REG(R14)(r1) - std r15,STK_REG(R15)(r1) - std r16,STK_REG(R16)(r1) + std r15,STK_REG(R14)(r1) + std r16,STK_REG(R15)(r1) + std r17,STK_REG(R16)(r1) li r12,64 - li r14,80 - li r15,96 - li r16,112 + li r15,80 + li r16,96 + li r17,112 mtctr r6 @@ -427,24 +427,24 @@ err4; lvx v6,r4,r9 err4; lvx v5,r4,r10 err4; lvx v4,r4,r11 err4; lvx v3,r4,r12 -err4; lvx v2,r4,r14 -err4; lvx v1,r4,r15 -err4; lvx v0,r4,r16 +err4; lvx v2,r4,r15 +err4; lvx v1,r4,r16 +err4; lvx v0,r4,r17 addi r4,r4,128 err4; stvx v7,r0,r3 err4; stvx v6,r3,r9 err4; stvx v5,r3,r10 err4; stvx v4,r3,r11 err4; stvx v3,r3,r12 -err4; stvx v2,r3,r14 -err4; stvx v1,r3,r15 -err4; stvx v0,r3,r16 +err4; stvx v2,r3,r15 +err4; stvx v1,r3,r16 +err4; stvx v0,r3,r17 addi r3,r3,128 bdnz 8b - ld r14,STK_REG(R14)(r1) - ld r15,STK_REG(R15)(r1) - ld r16,STK_REG(R16)(r1) + ld r15,STK_REG(R14)(r1) + ld r16,STK_REG(R15)(r1) + ld r17,STK_REG(R16)(r1) /* Up to 127B to go */ clrldi r5,r5,(64-7) @@ -590,14 +590,14 @@ err3; stvx v11,r3,r11 7: sub r5,r5,r6 srdi r6,r5,7 - std r14,STK_REG(R14)(r1) - std r15,STK_REG(R15)(r1) - std r16,STK_REG(R16)(r1) + std r15,STK_REG(R14)(r1) + std r16,STK_REG(R15)(r1) + std r17,STK_REG(R16)(r1) li r12,64 - li r14,80 - li r15,96 - li r16,112 + li r15,80 + li r16,96 + li r17,112 mtctr r6 @@ -617,11 +617,11 @@ err4; lvx v4,r4,r11 VPERM(v11,v5,v4,v16) err4; lvx v3,r4,r12 VPERM(v12,v4,v3,v16) -err4; lvx v2,r4,r14 +err4; lvx v2,r4,r15 VPERM(v13,v3,v2,v16) -err4; lvx v1,r4,r15 +err4; lvx v1,r4,r16 VPERM(v14,v2,v1,v16) -err4; lvx v0,r4,r16 +err4; lvx v0,r4,r17 VPERM(v15,v1,v0,v16) addi r4,r4,128 err4; stvx v8,r0,r3 @@ -629,15 +629,15 @@ err4; stvx v9,r3,r9 err4; stvx v10,r3,r10 err4; stvx v11,r3,r11 err4; stvx v12,r3,r12 -err4; stvx v13,r3,r14 -err4; stvx v14,r3,r15 -err4; stvx v15,r3,r16 +err4; stvx v13,r3,r15 +err4; stvx v14,r3,r16 +err4; stvx v15,r3,r17 addi r3,r3,128 bdnz 8b - ld r14,STK_REG(R14)(r1) - ld r15,STK_REG(R15)(r1) - ld r16,STK_REG(R16)(r1) + ld r15,STK_REG(R14)(r1) + ld r16,STK_REG(R15)(r1) + ld r17,STK_REG(R16)(r1) /* Up to 127B to go */ clrldi r5,r5,(64-7) diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index 7e5e1c28e56a..c46ad2f0a718 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -314,9 +314,12 @@ _GLOBAL(_restvr_31) #else /* CONFIG_PPC64 */ +/* 64-bit has -ffixed-r13, Book3S also has -ffixed-r14 */ +#ifdef CONFIG_PPC_BOOK3E .globl _savegpr0_14 _savegpr0_14: std r14,-144(r1) +#endif .globl _savegpr0_15 _savegpr0_15: std r15,-136(r1) diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 95ca426637eb..6c0684e5e0d3 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -76,7 +76,6 @@ _GLOBAL(memcpy_power7) mflr r0 stdu r1,-STACKFRAMESIZE(r1) - std r14,STK_REG(R14)(r1) std r15,STK_REG(R15)(r1) std r16,STK_REG(R16)(r1) std r17,STK_REG(R17)(r1) @@ -85,6 +84,7 @@ _GLOBAL(memcpy_power7) std r20,STK_REG(R20)(r1) std r21,STK_REG(R21)(r1) std r22,STK_REG(R22)(r1) + std r23,STK_REG(R23)(r1) std r0,STACKFRAMESIZE+16(r1) srdi r6,r5,7 @@ -101,14 +101,14 @@ _GLOBAL(memcpy_power7) ld r10,40(r4) ld r11,48(r4) ld r12,56(r4) - ld r14,64(r4) - ld r15,72(r4) - ld r16,80(r4) - ld r17,88(r4) - ld r18,96(r4) - ld r19,104(r4) - ld r20,112(r4) - ld r21,120(r4) + ld r15,64(r4) + ld r16,72(r4) + ld r17,80(r4) + ld r18,88(r4) + ld r19,96(r4) + ld r20,104(r4) + ld r21,112(r4) + ld r22,120(r4) addi r4,r4,128 std r0,0(r3) std r6,8(r3) @@ -118,20 +118,19 @@ _GLOBAL(memcpy_power7) std r10,40(r3) std r11,48(r3) std r12,56(r3) - std r14,64(r3) - std r15,72(r3) - std r16,80(r3) - std r17,88(r3) - std r18,96(r3) - std r19,104(r3) - std r20,112(r3) - std r21,120(r3) + std r15,64(r3) + std r16,72(r3) + std r17,80(r3) + std r18,88(r3) + std r19,96(r3) + std r20,104(r3) + std r21,112(r3) + std r22,120(r3) addi r3,r3,128 bdnz 4b clrldi r5,r5,(64-7) - ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) ld r17,STK_REG(R17)(r1) @@ -140,6 +139,7 @@ _GLOBAL(memcpy_power7) ld r20,STK_REG(R20)(r1) ld r21,STK_REG(R21)(r1) ld r22,STK_REG(R22)(r1) + ld r23,STK_REG(R23)(r1) addi r1,r1,STACKFRAMESIZE /* Up to 127B to go */ @@ -350,14 +350,14 @@ _GLOBAL(memcpy_power7) 7: sub r5,r5,r6 srdi r6,r5,7 - std r14,STK_REG(R14)(r1) std r15,STK_REG(R15)(r1) std r16,STK_REG(R16)(r1) + std r17,STK_REG(R17)(r1) li r12,64 - li r14,80 - li r15,96 - li r16,112 + li r15,80 + li r16,96 + li r17,112 mtctr r6 @@ -372,24 +372,24 @@ _GLOBAL(memcpy_power7) lvx v5,r4,r10 lvx v4,r4,r11 lvx v3,r4,r12 - lvx v2,r4,r14 - lvx v1,r4,r15 - lvx v0,r4,r16 + lvx v2,r4,r15 + lvx v1,r4,r16 + lvx v0,r4,r17 addi r4,r4,128 stvx v7,r0,r3 stvx v6,r3,r9 stvx v5,r3,r10 stvx v4,r3,r11 stvx v3,r3,r12 - stvx v2,r3,r14 - stvx v1,r3,r15 - stvx v0,r3,r16 + stvx v2,r3,r15 + stvx v1,r3,r16 + stvx v0,r3,r17 addi r3,r3,128 bdnz 8b - ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) /* Up to 127B to go */ clrldi r5,r5,(64-7) @@ -536,14 +536,14 @@ _GLOBAL(memcpy_power7) 7: sub r5,r5,r6 srdi r6,r5,7 - std r14,STK_REG(R14)(r1) std r15,STK_REG(R15)(r1) std r16,STK_REG(R16)(r1) + std r17,STK_REG(R17)(r1) li r12,64 - li r14,80 - li r15,96 - li r16,112 + li r15,80 + li r16,96 + li r17,112 mtctr r6 @@ -563,11 +563,11 @@ _GLOBAL(memcpy_power7) VPERM(v11,v5,v4,v16) lvx v3,r4,r12 VPERM(v12,v4,v3,v16) - lvx v2,r4,r14 + lvx v2,r4,r15 VPERM(v13,v3,v2,v16) - lvx v1,r4,r15 + lvx v1,r4,r16 VPERM(v14,v2,v1,v16) - lvx v0,r4,r16 + lvx v0,r4,r17 VPERM(v15,v1,v0,v16) addi r4,r4,128 stvx v8,r0,r3 @@ -575,15 +575,15 @@ _GLOBAL(memcpy_power7) stvx v10,r3,r10 stvx v11,r3,r11 stvx v12,r3,r12 - stvx v13,r3,r14 - stvx v14,r3,r15 - stvx v15,r3,r16 + stvx v13,r3,r15 + stvx v14,r3,r16 + stvx v15,r3,r17 addi r3,r3,128 bdnz 8b - ld r14,STK_REG(R14)(r1) ld r15,STK_REG(R15)(r1) ld r16,STK_REG(R16)(r1) + ld r17,STK_REG(R17)(r1) /* Up to 127B to go */ clrldi r5,r5,(64-7) diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index a8cd7e289ecd..52a30db033c1 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -44,9 +44,11 @@ * A register r4 * X register r5 * addr param r6 - * r7-r10 scratch - * skb->data r14 - * skb headlen r15 (skb->len - skb->data_len) + * scratch r7-r8 + * skb headlen r9 (skb->len - skb->data_len) + * skb->data r10 + * fixed regs r13-r14 + * unused r15 * m[0] r16 * m[...] ... * m[15] r31 @@ -58,8 +60,8 @@ #define r_addr 6 #define r_scratch1 7 #define r_scratch2 8 -#define r_D 14 -#define r_HL 15 +#define r_HL 9 +#define r_D 10 #define r_M 16 #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S index 3dd9c43d40c9..5b06152052f6 100644 --- a/arch/powerpc/net/bpf_jit_asm.S +++ b/arch/powerpc/net/bpf_jit_asm.S @@ -19,8 +19,8 @@ * r3 skb * r4,r5 A,X * r6 *** address parameter to helper *** - * r7-r10 scratch - * r14 skb->data + * r7-r9 scratch + * r10 skb->data * r15 skb headlen * r16-31 M[] */ -- 2.11.0 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC][PATCH] powerpc/64s: stop using r14 register 2017-05-21 14:00 [RFC][PATCH] powerpc/64s: stop using r14 register Nicholas Piggin @ 2017-05-21 22:09 ` Benjamin Herrenschmidt 2017-05-22 0:29 ` Nicholas Piggin 2017-05-30 19:08 ` Naveen N. Rao 1 sibling, 1 reply; 7+ messages in thread From: Benjamin Herrenschmidt @ 2017-05-21 22:09 UTC (permalink / raw) To: Nicholas Piggin, linuxppc-dev; +Cc: Anton Blanchard On Mon, 2017-05-22 at 00:00 +1000, Nicholas Piggin wrote: > I'd like to take over the r14 register for use as a per-cpu kernel > register similar to the way r13 is used for the paca. Why not use r13 instead ? We don't need to access the PACA that often from C code, I thought we could flip them... > r14 being the last non-volatile register gcc allocates, appears with > about 0.5% the frequency as r31 in (static) instructions. I haven't > counted dynamically how many extra spills and fills that removing it > causes, but I should. My guess is the memory ops saved by using > it as a per-cpu variable will significantly outweigh the cost of > losing it as a general use register. > > This part of the patch is pretty mechanical. A couple of places (prom) > still have to use it, and I haven't quite understood the KVM code yet. > > Question is whether this approach seems okay, and whether we should do > the same for 64e. > > Thanks, > Nick > > --- > arch/powerpc/Makefile | 1 + > arch/powerpc/crypto/md5-asm.S | 40 +++---- > arch/powerpc/crypto/sha1-powerpc-asm.S | 10 +- > arch/powerpc/include/asm/ppc_asm.h | 21 +++- > arch/powerpc/kernel/asm-offsets.c | 4 +- > arch/powerpc/kernel/entry_32.S | 4 +- > arch/powerpc/kernel/entry_64.S | 46 ++++---- > arch/powerpc/kernel/exceptions-64s.S | 3 +- > arch/powerpc/kernel/head_64.S | 8 +- > arch/powerpc/kernel/idle_book3s.S | 88 +++++++------- > arch/powerpc/kernel/process.c | 4 +- > arch/powerpc/kernel/tm.S | 30 ++--- > arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 4 +- > arch/powerpc/kvm/book3s_hv_interrupts.S | 5 +- > arch/powerpc/lib/checksum_64.S | 66 +++++------ > arch/powerpc/lib/copypage_power7.S | 32 +++--- > arch/powerpc/lib/copyuser_power7.S | 152 ++++++++++++------------- > arch/powerpc/lib/crtsavres.S | 3 + > arch/powerpc/lib/memcpy_power7.S | 80 ++++++------- > arch/powerpc/net/bpf_jit32.h | 12 +- > arch/powerpc/net/bpf_jit_asm.S | 4 +- > 21 files changed, 321 insertions(+), 296 deletions(-) > > diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile > index bc4791aecd03..4c3492851fab 100644 > --- a/arch/powerpc/Makefile > +++ b/arch/powerpc/Makefile > @@ -137,6 +137,7 @@ endif > > CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) > CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) > +CFLAGS-$(CONFIG_PPC64) += -ffixed-r13 -ffixed-r14 > CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) > > ifeq ($(CONFIG_PPC_BOOK3S_64),y) > diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S > index 10cdf5bceebb..99e41af88e19 100644 > --- a/arch/powerpc/crypto/md5-asm.S > +++ b/arch/powerpc/crypto/md5-asm.S > @@ -25,31 +25,31 @@ > #define rW02 r10 > #define rW03 r11 > #define rW04 r12 > -#define rW05 r14 > -#define rW06 r15 > -#define rW07 r16 > -#define rW08 r17 > -#define rW09 r18 > -#define rW10 r19 > -#define rW11 r20 > -#define rW12 r21 > -#define rW13 r22 > -#define rW14 r23 > -#define rW15 r24 > - > -#define rT0 r25 > -#define rT1 r26 > +#define rW05 r15 > +#define rW06 r16 > +#define rW07 r17 > +#define rW08 r18 > +#define rW09 r19 > +#define rW10 r20 > +#define rW11 r21 > +#define rW12 r22 > +#define rW13 r23 > +#define rW14 r24 > +#define rW15 r25 > + > +#define rT0 r26 > +#define rT1 r27 > > #define INITIALIZE \ > PPC_STLU r1,-INT_FRAME_SIZE(r1); \ > - SAVE_8GPRS(14, r1); /* push registers onto stack */ \ > - SAVE_4GPRS(22, r1); \ > - SAVE_GPR(26, r1) > + SAVE_8GPRS(15, r1); /* push registers onto stack */ \ > + SAVE_4GPRS(23, r1); \ > + SAVE_GPR(27, r1) > > #define FINALIZE \ > - REST_8GPRS(14, r1); /* pop registers from stack */ \ > - REST_4GPRS(22, r1); \ > - REST_GPR(26, r1); \ > + REST_8GPRS(15, r1); /* pop registers from stack */ \ > + REST_4GPRS(23, r1); \ > + REST_GPR(27, r1); \ > addi r1,r1,INT_FRAME_SIZE; > > #ifdef __BIG_ENDIAN__ > diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S > index 82ddc9bdfeb1..56bc6ac942c6 100644 > --- a/arch/powerpc/crypto/sha1-powerpc-asm.S > +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S > @@ -41,10 +41,10 @@ > or r6,r6,r0; \ > add r0,RE(t),r15; \ > add RT(t),RT(t),r6; \ > - add r14,r0,W(t); \ > + add r6,r0,W(t); \ > LWZ(W((t)+4),((t)+4)*4,r4); \ > rotlwi RB(t),RB(t),30; \ > - add RT(t),RT(t),r14 > + add RT(t),RT(t),r6 > > #define STEPD0_UPDATE(t) \ > and r6,RB(t),RC(t); \ > @@ -123,8 +123,7 @@ > > _GLOBAL(powerpc_sha_transform) > PPC_STLU r1,-INT_FRAME_SIZE(r1) > - SAVE_8GPRS(14, r1) > - SAVE_10GPRS(22, r1) > + SAVE_NVGPRS(r1) > > /* Load up A - E */ > lwz RA(0),0(r3) /* A */ > @@ -182,7 +181,6 @@ _GLOBAL(powerpc_sha_transform) > stw RD(0),12(r3) > stw RE(0),16(r3) > > - REST_8GPRS(14, r1) > - REST_10GPRS(22, r1) > + REST_NVGPRS(r1) > addi r1,r1,INT_FRAME_SIZE > blr > diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h > index 359c44341761..ed696de5888b 100644 > --- a/arch/powerpc/include/asm/ppc_asm.h > +++ b/arch/powerpc/include/asm/ppc_asm.h > @@ -10,6 +10,16 @@ > #include <asm/ppc-opcode.h> > #include <asm/firmware.h> > > +#ifdef __powerpc64__ > +#ifdef CONFIG_PPC_BOOK3S > +#define FIRST_NVGPR 15 > +#else > +#define FIRST_NVGPR 14 > +#endif > +#else > +#define FIRST_NVGPR 13 > +#endif > + > #ifdef __ASSEMBLY__ > > #define SZL (BITS_PER_LONG/8) > @@ -75,16 +85,21 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) > #ifdef __powerpc64__ > #define SAVE_GPR(n, base) std n,GPR0+8*(n)(base) > #define REST_GPR(n, base) ld n,GPR0+8*(n)(base) > +#ifdef CONFIG_PPC_BOOK3S > +#define SAVE_NVGPRS(base) SAVE_GPR(15, base); SAVE_2GPRS(16, base); SAVE_4GPRS(18, base); SAVE_10GPRS(22, base) > +#define REST_NVGPRS(base) REST_GPR(15, base); REST_2GPRS(16, base); REST_4GPRS(18, base); REST_10GPRS(22, base) > +#else /* CONFIG_PPC_BOOK3S */ > #define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) > #define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base) > -#else > +#endif /* CONFIG_PPC_BOOK3S */ > +#else /* __powerpc64__ */ > #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) > #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) > #define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); \ > SAVE_10GPRS(22, base) > #define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); \ > REST_10GPRS(22, base) > -#endif > +#endif /* __powerpc64__ */ > > #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) > #define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) > @@ -184,7 +199,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) > #ifdef CONFIG_PPC64 > > #define STACKFRAMESIZE 256 > -#define __STK_REG(i) (112 + ((i)-14)*8) > +#define __STK_REG(i) (112 + ((i)-15)*8) > #define STK_REG(i) __STK_REG(__REG_##i) > > #ifdef PPC64_ELF_ABI_v2 > diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c > index 709e23425317..49e849990f9f 100644 > --- a/arch/powerpc/kernel/asm-offsets.c > +++ b/arch/powerpc/kernel/asm-offsets.c > @@ -283,9 +283,9 @@ int main(void) > STACK_PT_REGS_OFFSET(GPR11, gpr[11]); > STACK_PT_REGS_OFFSET(GPR12, gpr[12]); > STACK_PT_REGS_OFFSET(GPR13, gpr[13]); > -#ifndef CONFIG_PPC64 > +#ifndef CONFIG_PPC_BOOK3E_64 > STACK_PT_REGS_OFFSET(GPR14, gpr[14]); > -#endif /* CONFIG_PPC64 */ > +#endif > /* > * Note: these symbols include _ because they overlap with special > * register names > diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S > index 8587059ad848..9ffea7c7764f 100644 > --- a/arch/powerpc/kernel/entry_32.S > +++ b/arch/powerpc/kernel/entry_32.S > @@ -451,8 +451,8 @@ ret_from_fork: > ret_from_kernel_thread: > REST_NVGPRS(r1) > bl schedule_tail > - mtlr r14 > - mr r3,r15 > + mtlr FIRST_NVGPR > + mr r3,FIRST_NVGPR+1 > PPC440EP_ERR42 > blrl > li r3,0 > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S > index b8b6069309da..8db0f378e8b0 100644 > --- a/arch/powerpc/kernel/entry_64.S > +++ b/arch/powerpc/kernel/entry_64.S > @@ -38,6 +38,7 @@ > #include <asm/tm.h> > #include <asm/ppc-opcode.h> > #include <asm/export.h> > +#include <asm/exception-64s.h> > > /* > * System calls. > @@ -405,7 +406,7 @@ _GLOBAL(save_nvgprs) > * The sigsuspend and rt_sigsuspend system calls can call do_signal > * and thus put the process into the stopped state where we might > * want to examine its user state with ptrace. Therefore we need > - * to save all the nonvolatile registers (r14 - r31) before calling > + * to save all the nonvolatile registers (r15 - r31) before calling > * the C code. Similarly, fork, vfork and clone need the full > * register state on the stack so that it can be copied to the child. > */ > @@ -449,10 +450,10 @@ _GLOBAL(ret_from_fork) > _GLOBAL(ret_from_kernel_thread) > bl schedule_tail > REST_NVGPRS(r1) > - mtlr r14 > - mr r3,r15 > + mtlr FIRST_NVGPR > + mr r3,FIRST_NVGPR+1 > #ifdef PPC64_ELF_ABI_v2 > - mr r12,r14 > + mr r12,FIRST_NVGPR > #endif > blrl > li r3,0 > @@ -481,9 +482,7 @@ _GLOBAL(_switch) > mflr r0 > std r0,16(r1) > stdu r1,-SWITCH_FRAME_SIZE(r1) > - /* r3-r13 are caller saved -- Cort */ > - SAVE_8GPRS(14, r1) > - SAVE_10GPRS(22, r1) > + SAVE_NVGPRS(r1) > std r0,_NIP(r1) /* Return to switch caller */ > mfcr r23 > std r23,_CCR(r1) > @@ -590,9 +589,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) > ld r6,_CCR(r1) > mtcrf 0xFF,r6 > > - /* r3-r13 are destroyed -- Cort */ > - REST_8GPRS(14, r1) > - REST_10GPRS(22, r1) > + /* Volatile regs are destroyed */ > + REST_NVGPRS(r1) > > /* convert old thread to its task_struct for return value */ > addi r3,r3,-THREAD > @@ -980,12 +978,14 @@ _GLOBAL(enter_rtas) > > /* Because RTAS is running in 32b mode, it clobbers the high order half > * of all registers that it saves. We therefore save those registers > - * RTAS might touch to the stack. (r0, r3-r13 are caller saved) > + * RTAS might touch to the stack. (r0, r3-r12 are caller saved) > */ > SAVE_GPR(2, r1) /* Save the TOC */ > SAVE_GPR(13, r1) /* Save paca */ > - SAVE_8GPRS(14, r1) /* Save the non-volatiles */ > - SAVE_10GPRS(22, r1) /* ditto */ > +#ifdef CONFIG_PPC_BOOK3S > + SAVE_GPR(14, r1) /* Save r14 */ > +#endif > + SAVE_NVGPRS(r1) /* Save the non-volatiles */ > > mfcr r4 > std r4,_CCR(r1) > @@ -1083,8 +1083,10 @@ rtas_restore_regs: > /* relocation is on at this point */ > REST_GPR(2, r1) /* Restore the TOC */ > REST_GPR(13, r1) /* Restore paca */ > - REST_8GPRS(14, r1) /* Restore the non-volatiles */ > - REST_10GPRS(22, r1) /* ditto */ > +#ifdef CONFIG_PPC_BOOK3S > + REST_GPR(14, r1) /* Restore r14 */ > +#endif > + REST_NVGPRS(r1) /* Restore the non-volatiles */ > > GET_PACA(r13) > > @@ -1114,12 +1116,14 @@ _GLOBAL(enter_prom) > > /* Because PROM is running in 32b mode, it clobbers the high order half > * of all registers that it saves. We therefore save those registers > - * PROM might touch to the stack. (r0, r3-r13 are caller saved) > + * PROM might touch to the stack. (r0, r3-r14 are caller saved) > */ > SAVE_GPR(2, r1) > SAVE_GPR(13, r1) > - SAVE_8GPRS(14, r1) > - SAVE_10GPRS(22, r1) > +#ifdef CONFIG_PPC_BOOK3S > + SAVE_GPR(14, r1) > +#endif > + SAVE_NVGPRS(r1) > mfcr r10 > mfmsr r11 > std r10,_CCR(r1) > @@ -1163,8 +1167,10 @@ _GLOBAL(enter_prom) > /* Restore other registers */ > REST_GPR(2, r1) > REST_GPR(13, r1) > - REST_8GPRS(14, r1) > - REST_10GPRS(22, r1) > +#ifdef CONFIG_PPC_BOOK3S > + REST_GPR(14, r1) > +#endif > + REST_NVGPRS(r1) > ld r4,_CCR(r1) > mtcr r4 > > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S > index cf6dd08493cb..5c1d10c09c4e 100644 > --- a/arch/powerpc/kernel/exceptions-64s.S > +++ b/arch/powerpc/kernel/exceptions-64s.S > @@ -1536,8 +1536,7 @@ BEGIN_FTR_SECTION > ld r10,EX_CFAR(r3) > std r10,ORIG_GPR3(r1) > END_FTR_SECTION_IFSET(CPU_FTR_CFAR) > - SAVE_8GPRS(14,r1) > - SAVE_10GPRS(22,r1) > + SAVE_NVGPRS(r1) > lhz r12,PACA_TRAP_SAVE(r13) > std r12,_TRAP(r1) > addi r11,r1,INT_FRAME_SIZE > diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S > index e43116237944..ffe46b5558e4 100644 > --- a/arch/powerpc/kernel/head_64.S > +++ b/arch/powerpc/kernel/head_64.S > @@ -796,9 +796,9 @@ __secondary_start: > /* Initialize the kernel stack */ > LOAD_REG_ADDR(r3, current_set) > sldi r28,r24,3 /* get current_set[cpu#] */ > - ldx r14,r3,r28 > - addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD > - std r14,PACAKSAVE(r13) > + ldx r15,r3,r28 > + addi r15,r15,THREAD_SIZE-STACK_FRAME_OVERHEAD > + std r15,PACAKSAVE(r13) > > /* Do early setup for that CPU (SLB and hash table pointer) */ > bl early_setup_secondary > @@ -807,7 +807,7 @@ __secondary_start: > * setup the new stack pointer, but *don't* use this until > * translation is on. > */ > - mr r1, r14 > + mr r1, r15 > > /* Clear backchain so we get nice backtraces */ > li r7,0 > diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S > index 07d4e0ad60db..8c84ab501236 100644 > --- a/arch/powerpc/kernel/idle_book3s.S > +++ b/arch/powerpc/kernel/idle_book3s.S > @@ -87,19 +87,19 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) > /* > * Used by threads when the lock bit of core_idle_state is set. > * Threads will spin in HMT_LOW until the lock bit is cleared. > - * r14 - pointer to core_idle_state > - * r15 - used to load contents of core_idle_state > + * r15 - pointer to core_idle_state > + * r16 - used to load contents of core_idle_state > * r9 - used as a temporary variable > */ > > core_idle_lock_held: > HMT_LOW > -3: lwz r15,0(r14) > - andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h > +3: lwz r16,0(r15) > + andis. r16,r16,PNV_CORE_IDLE_LOCK_BIT@h > bne 3b > HMT_MEDIUM > - lwarx r15,0,r14 > - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h > + lwarx r16,0,r15 > + andis. r9,r16,PNV_CORE_IDLE_LOCK_BIT@h > bne- core_idle_lock_held > blr > > @@ -209,21 +209,21 @@ pnv_enter_arch207_idle_mode: > 2: > /* Sleep or winkle */ > lbz r7,PACA_THREAD_MASK(r13) > - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) > + ld r15,PACA_CORE_IDLE_STATE_PTR(r13) > li r5,0 > beq cr3,3f > lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h > 3: > lwarx_loop1: > - lwarx r15,0,r14 > + lwarx r16,0,r15 > > - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h > + andis. r9,r16,PNV_CORE_IDLE_LOCK_BIT@h > bnel- core_idle_lock_held > > - add r15,r15,r5 /* Add if winkle */ > - andc r15,r15,r7 /* Clear thread bit */ > + add r16,r16,r5 /* Add if winkle */ > + andc r16,r16,r7 /* Clear thread bit */ > > - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS > + andi. r9,r16,PNV_CORE_IDLE_THREAD_BITS > > /* > * If cr0 = 0, then current thread is the last thread of the core entering > @@ -237,7 +237,7 @@ lwarx_loop1: > pnv_fastsleep_workaround_at_entry: > beq fastsleep_workaround_at_entry > > - stwcx. r15,0,r14 > + stwcx. r16,0,r15 > bne- lwarx_loop1 > isync > > @@ -246,8 +246,8 @@ common_enter: /* common code for all the threads entering sleep or winkle */ > IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) > > fastsleep_workaround_at_entry: > - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h > - stwcx. r15,0,r14 > + oris r16,r16,PNV_CORE_IDLE_LOCK_BIT@h > + stwcx. r16,0,r15 > bne- lwarx_loop1 > isync > > @@ -257,9 +257,9 @@ fastsleep_workaround_at_entry: > bl opal_config_cpu_idle_state > > /* Unlock */ > - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h > + xoris r16,r16,PNV_CORE_IDLE_LOCK_BIT@h > lwsync > - stw r15,0(r14) > + stw r16,0(r15) > b common_enter > > enter_winkle: > @@ -303,15 +303,15 @@ power_enter_stop: > * stack and enter stop > */ > lbz r7,PACA_THREAD_MASK(r13) > - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) > + ld r15,PACA_CORE_IDLE_STATE_PTR(r13) > > lwarx_loop_stop: > - lwarx r15,0,r14 > - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h > + lwarx r16,0,r15 > + andis. r9,r16,PNV_CORE_IDLE_LOCK_BIT@h > bnel- core_idle_lock_held > - andc r15,r15,r7 /* Clear thread bit */ > + andc r16,r16,r7 /* Clear thread bit */ > > - stwcx. r15,0,r14 > + stwcx. r16,0,r15 > bne- lwarx_loop_stop > isync > > @@ -567,14 +567,14 @@ pnv_wakeup_tb_loss: > * is required to return back to reset vector after hypervisor state > * restore is complete. > */ > - mr r18,r4 > - mflr r17 > - mfspr r16,SPRN_SRR1 > + mr r19,r4 > + mflr r18 > + mfspr r17,SPRN_SRR1 > BEGIN_FTR_SECTION > CHECK_HMI_INTERRUPT > END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) > > - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) > + ld r15,PACA_CORE_IDLE_STATE_PTR(r13) > lbz r7,PACA_THREAD_MASK(r13) > > /* > @@ -588,15 +588,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) > * In either case loop until the lock bit is cleared. > */ > 1: > - lwarx r15,0,r14 > - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h > + lwarx r16,0,r15 > + andis. r9,r16,PNV_CORE_IDLE_LOCK_BIT@h > bnel- core_idle_lock_held > - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h > - stwcx. r15,0,r14 > + oris r16,r16,PNV_CORE_IDLE_LOCK_BIT@h > + stwcx. r16,0,r15 > bne- 1b > isync > > - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS > + andi. r9,r16,PNV_CORE_IDLE_THREAD_BITS > cmpwi cr2,r9,0 > > /* > @@ -660,29 +660,29 @@ BEGIN_FTR_SECTION > * } > * > */ > - cmpwi r18,PNV_THREAD_WINKLE > + cmpwi r19,PNV_THREAD_WINKLE > bne 2f > - andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h > - subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h > + andis. r9,r16,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h > + subis r16,r16,PNV_CORE_IDLE_WINKLE_COUNT@h > beq 2f > - ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ > + ori r16,r16,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ > 2: > /* Shift thread bit to winkle mask, then test if this thread is set, > * and remove it from the winkle bits */ > slwi r8,r7,8 > - and r8,r8,r15 > - andc r15,r15,r8 > + and r8,r8,r16 > + andc r16,r16,r8 > cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */ > > lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) > - and r4,r4,r15 > + and r4,r4,r16 > cmpwi r4,0 /* Check if first in subcore */ > > - or r15,r15,r7 /* Set thread bit */ > + or r16,r16,r7 /* Set thread bit */ > beq first_thread_in_subcore > END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) > > - or r15,r15,r7 /* Set thread bit */ > + or r16,r16,r7 /* Set thread bit */ > beq cr2,first_thread_in_core > > /* Not first thread in core or subcore to wake up */ > @@ -758,9 +758,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) > mtspr SPRN_WORC,r4 > > clear_lock: > - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h > + xoris r16,r16,PNV_CORE_IDLE_LOCK_BIT@h > lwsync > - stw r15,0(r14) > + stw r16,0(r15) > > common_exit: > /* > @@ -814,8 +814,8 @@ no_segments: > > hypervisor_state_restored: > > - mtspr SPRN_SRR1,r16 > - mtlr r17 > + mtspr SPRN_SRR1,r17 > + mtlr r18 > blr /* return to pnv_powersave_wakeup */ > > fastsleep_workaround_at_exit: > diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c > index d645da302bf2..868835bb64c3 100644 > --- a/arch/powerpc/kernel/process.c > +++ b/arch/powerpc/kernel/process.c > @@ -1478,12 +1478,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, > childregs->gpr[1] = sp + sizeof(struct pt_regs); > /* function */ > if (usp) > - childregs->gpr[14] = ppc_function_entry((void *)usp); > + childregs->gpr[FIRST_NVGPR] = ppc_function_entry((void *)usp); > #ifdef CONFIG_PPC64 > clear_tsk_thread_flag(p, TIF_32BIT); > childregs->softe = 1; > #endif > - childregs->gpr[15] = kthread_arg; > + childregs->gpr[FIRST_NVGPR + 1] = kthread_arg; > p->thread.regs = NULL; /* no user register state */ > ti->flags |= _TIF_RESTOREALL; > f = ret_from_kernel_thread; > diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S > index 3a2d04134da9..cc953bddeec4 100644 > --- a/arch/powerpc/kernel/tm.S > +++ b/arch/powerpc/kernel/tm.S > @@ -112,24 +112,24 @@ _GLOBAL(tm_reclaim) > SAVE_NVGPRS(r1) > > /* We need to setup MSR for VSX register save instructions. */ > - mfmsr r14 > - mr r15, r14 > - ori r15, r15, MSR_FP > - li r16, 0 > - ori r16, r16, MSR_EE /* IRQs hard off */ > - andc r15, r15, r16 > - oris r15, r15, MSR_VEC@h > + mfmsr r15 > + mr r16, r15 > + ori r16, r16, MSR_FP > + li r17, 0 > + ori r17, r17, MSR_EE /* IRQs hard off */ > + andc r16, r16, r17 > + oris r16, r16, MSR_VEC@h > #ifdef CONFIG_VSX > BEGIN_FTR_SECTION > - oris r15,r15, MSR_VSX@h > + oris r16,r16, MSR_VSX@h > END_FTR_SECTION_IFSET(CPU_FTR_VSX) > #endif > - mtmsrd r15 > - std r14, TM_FRAME_L0(r1) > + mtmsrd r16 > + std r15, TM_FRAME_L0(r1) > > /* Do sanity check on MSR to make sure we are suspended */ > li r7, (MSR_TS_S)@higher > - srdi r6, r14, 32 > + srdi r6, r15, 32 > and r6, r6, r7 > 1: tdeqi r6, 0 > EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 > @@ -291,11 +291,11 @@ dont_backup_fp: > /* AMR is checkpointed too, but is unsupported by Linux. */ > > /* Restore original MSR/IRQ state & clear TM mode */ > - ld r14, TM_FRAME_L0(r1) /* Orig MSR */ > + ld r15, TM_FRAME_L0(r1) /* Orig MSR */ > > - li r15, 0 > - rldimi r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1 > - mtmsrd r14 > + li r16, 0 > + rldimi r15, r16, MSR_TS_LG, (63-MSR_TS_LG)-1 > + mtmsrd r15 > > REST_NVGPRS(r1) > > diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S > index 7c933a99f5d5..e1f7f4c6767a 100644 > --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S > +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S > @@ -72,7 +72,7 @@ _GLOBAL(ftrace_caller) > ld r5,0(r3) > > #ifdef CONFIG_LIVEPATCH > - mr r14,r7 /* remember old NIP */ > + mr r15,r7 /* remember old NIP */ > #endif > /* Calculate ip from nip-4 into r3 for call below */ > subi r3, r7, MCOUNT_INSN_SIZE > @@ -99,7 +99,7 @@ ftrace_call: > ld r3, _NIP(r1) > mtctr r3 > #ifdef CONFIG_LIVEPATCH > - cmpd r14,r3 /* has NIP been altered? */ > + cmpd r15,r3 /* has NIP been altered? */ > #endif > > /* Restore gprs */ > diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S > index 0fdc4a28970b..5d5a27c5c1ae 100644 > --- a/arch/powerpc/kvm/book3s_hv_interrupts.S > +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S > @@ -46,7 +46,7 @@ _GLOBAL(__kvmppc_vcore_entry) > /* Save host state to the stack */ > stdu r1, -SWITCH_FRAME_SIZE(r1) > > - /* Save non-volatile registers (r14 - r31) and CR */ > + /* Save non-volatile registers (r15 - r31) and CR */ > SAVE_NVGPRS(r1) > mfcr r3 > std r3, _CCR(r1) > @@ -145,9 +145,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > * R2 = host R2 > * R12 = exit handler id > * R13 = PACA > + * R14 = ? XXX > */ > > - /* Restore non-volatile host registers (r14 - r31) and CR */ > + /* Restore non-volatile host registers (r15 - r31) and CR */ > REST_NVGPRS(r1) > ld r4, _CCR(r1) > mtcr r4 > diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S > index 47e06147c92c..4e1c4e560a3b 100644 > --- a/arch/powerpc/lib/checksum_64.S > +++ b/arch/powerpc/lib/checksum_64.S > @@ -65,9 +65,9 @@ _GLOBAL(__csum_partial) > mtctr r6 > > stdu r1,-STACKFRAMESIZE(r1) > - std r14,STK_REG(R14)(r1) > std r15,STK_REG(R15)(r1) > std r16,STK_REG(R16)(r1) > + std r17,STK_REG(R17)(r1) > > ld r6,0(r3) > ld r9,8(r3) > @@ -85,11 +85,11 @@ _GLOBAL(__csum_partial) > 2: > adde r0,r0,r6 > ld r12,32(r3) > - ld r14,40(r3) > + ld r15,40(r3) > > adde r0,r0,r9 > - ld r15,48(r3) > - ld r16,56(r3) > + ld r16,48(r3) > + ld r17,56(r3) > addi r3,r3,64 > > adde r0,r0,r10 > @@ -98,13 +98,13 @@ _GLOBAL(__csum_partial) > > adde r0,r0,r12 > > - adde r0,r0,r14 > - > adde r0,r0,r15 > + > + adde r0,r0,r16 > ld r6,0(r3) > ld r9,8(r3) > > - adde r0,r0,r16 > + adde r0,r0,r17 > ld r10,16(r3) > ld r11,24(r3) > bdnz 2b > @@ -112,23 +112,23 @@ _GLOBAL(__csum_partial) > > adde r0,r0,r6 > ld r12,32(r3) > - ld r14,40(r3) > + ld r15,40(r3) > > adde r0,r0,r9 > - ld r15,48(r3) > - ld r16,56(r3) > + ld r16,48(r3) > + ld r17,56(r3) > addi r3,r3,64 > > adde r0,r0,r10 > adde r0,r0,r11 > adde r0,r0,r12 > - adde r0,r0,r14 > adde r0,r0,r15 > adde r0,r0,r16 > + adde r0,r0,r17 > > - ld r14,STK_REG(R14)(r1) > ld r15,STK_REG(R15)(r1) > ld r16,STK_REG(R16)(r1) > + ld r17,STK_REG(R17)(r1) > addi r1,r1,STACKFRAMESIZE > > andi. r4,r4,63 > @@ -259,9 +259,9 @@ dstnr; sth r6,0(r4) > mtctr r6 > > stdu r1,-STACKFRAMESIZE(r1) > - std r14,STK_REG(R14)(r1) > std r15,STK_REG(R15)(r1) > std r16,STK_REG(R16)(r1) > + std r17,STK_REG(R17)(r1) > > source; ld r6,0(r3) > source; ld r9,8(r3) > @@ -279,11 +279,11 @@ source; ld r11,24(r3) > 2: > adde r0,r0,r6 > source; ld r12,32(r3) > -source; ld r14,40(r3) > +source; ld r15,40(r3) > > adde r0,r0,r9 > -source; ld r15,48(r3) > -source; ld r16,56(r3) > +source; ld r16,48(r3) > +source; ld r17,56(r3) > addi r3,r3,64 > > adde r0,r0,r10 > @@ -296,18 +296,18 @@ dest; std r11,24(r4) > > adde r0,r0,r12 > dest; std r12,32(r4) > -dest; std r14,40(r4) > +dest; std r15,40(r4) > > - adde r0,r0,r14 > -dest; std r15,48(r4) > -dest; std r16,56(r4) > + adde r0,r0,r15 > +dest; std r16,48(r4) > +dest; std r17,56(r4) > addi r4,r4,64 > > - adde r0,r0,r15 > + adde r0,r0,r16 > source; ld r6,0(r3) > source; ld r9,8(r3) > > - adde r0,r0,r16 > + adde r0,r0,r17 > source; ld r10,16(r3) > source; ld r11,24(r3) > bdnz 2b > @@ -315,11 +315,11 @@ source; ld r11,24(r3) > > adde r0,r0,r6 > source; ld r12,32(r3) > -source; ld r14,40(r3) > +source; ld r15,40(r3) > > adde r0,r0,r9 > -source; ld r15,48(r3) > -source; ld r16,56(r3) > +source; ld r16,48(r3) > +source; ld r17,56(r3) > addi r3,r3,64 > > adde r0,r0,r10 > @@ -332,19 +332,19 @@ dest; std r11,24(r4) > > adde r0,r0,r12 > dest; std r12,32(r4) > -dest; std r14,40(r4) > +dest; std r15,40(r4) > > - adde r0,r0,r14 > -dest; std r15,48(r4) > -dest; std r16,56(r4) > + adde r0,r0,r15 > +dest; std r16,48(r4) > +dest; std r17,56(r4) > addi r4,r4,64 > > - adde r0,r0,r15 > adde r0,r0,r16 > + adde r0,r0,r17 > > - ld r14,STK_REG(R14)(r1) > ld r15,STK_REG(R15)(r1) > ld r16,STK_REG(R16)(r1) > + ld r17,STK_REG(R17)(r1) > addi r1,r1,STACKFRAMESIZE > > andi. r5,r5,63 > @@ -407,9 +407,9 @@ dstnr; stb r6,0(r4) > blr > > .Lsrc_error: > - ld r14,STK_REG(R14)(r1) > ld r15,STK_REG(R15)(r1) > ld r16,STK_REG(R16)(r1) > + ld r17,STK_REG(R17)(r1) > addi r1,r1,STACKFRAMESIZE > .Lsrc_error_nr: > cmpdi 0,r7,0 > @@ -419,9 +419,9 @@ dstnr; stb r6,0(r4) > blr > > .Ldest_error: > - ld r14,STK_REG(R14)(r1) > ld r15,STK_REG(R15)(r1) > ld r16,STK_REG(R16)(r1) > + ld r17,STK_REG(R17)(r1) > addi r1,r1,STACKFRAMESIZE > .Ldest_error_nr: > cmpdi 0,r8,0 > diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S > index c517c27fe43c..8e65d4ea0ee4 100644 > --- a/arch/powerpc/lib/copypage_power7.S > +++ b/arch/powerpc/lib/copypage_power7.S > @@ -114,13 +114,13 @@ _GLOBAL(copypage_power7) > #endif > > .Lnonvmx_copy: > - std r14,STK_REG(R14)(r1) > std r15,STK_REG(R15)(r1) > std r16,STK_REG(R16)(r1) > std r17,STK_REG(R17)(r1) > std r18,STK_REG(R18)(r1) > std r19,STK_REG(R19)(r1) > std r20,STK_REG(R20)(r1) > + std r21,STK_REG(R21)(r1) > > 1: ld r0,0(r4) > ld r5,8(r4) > @@ -131,13 +131,13 @@ _GLOBAL(copypage_power7) > ld r10,48(r4) > ld r11,56(r4) > ld r12,64(r4) > - ld r14,72(r4) > - ld r15,80(r4) > - ld r16,88(r4) > - ld r17,96(r4) > - ld r18,104(r4) > - ld r19,112(r4) > - ld r20,120(r4) > + ld r15,72(r4) > + ld r16,80(r4) > + ld r17,88(r4) > + ld r18,96(r4) > + ld r19,104(r4) > + ld r20,112(r4) > + ld r21,120(r4) > addi r4,r4,128 > std r0,0(r3) > std r5,8(r3) > @@ -148,22 +148,22 @@ _GLOBAL(copypage_power7) > std r10,48(r3) > std r11,56(r3) > std r12,64(r3) > - std r14,72(r3) > - std r15,80(r3) > - std r16,88(r3) > - std r17,96(r3) > - std r18,104(r3) > - std r19,112(r3) > - std r20,120(r3) > + std r15,72(r3) > + std r16,80(r3) > + std r17,88(r3) > + std r18,96(r3) > + std r19,104(r3) > + std r20,112(r3) > + std r21,120(r3) > addi r3,r3,128 > bdnz 1b > > - ld r14,STK_REG(R14)(r1) > ld r15,STK_REG(R15)(r1) > ld r16,STK_REG(R16)(r1) > ld r17,STK_REG(R17)(r1) > ld r18,STK_REG(R18)(r1) > ld r19,STK_REG(R19)(r1) > ld r20,STK_REG(R20)(r1) > + ld r21,STK_REG(R21)(r1) > addi r1,r1,STACKFRAMESIZE > blr > diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S > index 5d6ccd75b433..8f17ad74da16 100644 > --- a/arch/powerpc/lib/copyuser_power7.S > +++ b/arch/powerpc/lib/copyuser_power7.S > @@ -51,9 +51,9 @@ > > > .Ldo_err4: > - ld r16,STK_REG(R16)(r1) > - ld r15,STK_REG(R15)(r1) > - ld r14,STK_REG(R14)(r1) > + ld r17,STK_REG(R16)(r1) > + ld r16,STK_REG(R15)(r1) > + ld r15,STK_REG(R14)(r1) > .Ldo_err3: > bl exit_vmx_usercopy > ld r0,STACKFRAMESIZE+16(r1) > @@ -62,15 +62,15 @@ > #endif /* CONFIG_ALTIVEC */ > > .Ldo_err2: > - ld r22,STK_REG(R22)(r1) > - ld r21,STK_REG(R21)(r1) > - ld r20,STK_REG(R20)(r1) > - ld r19,STK_REG(R19)(r1) > - ld r18,STK_REG(R18)(r1) > - ld r17,STK_REG(R17)(r1) > - ld r16,STK_REG(R16)(r1) > - ld r15,STK_REG(R15)(r1) > - ld r14,STK_REG(R14)(r1) > + ld r23,STK_REG(R22)(r1) > + ld r22,STK_REG(R21)(r1) > + ld r21,STK_REG(R20)(r1) > + ld r20,STK_REG(R19)(r1) > + ld r19,STK_REG(R18)(r1) > + ld r18,STK_REG(R17)(r1) > + ld r17,STK_REG(R16)(r1) > + ld r16,STK_REG(R15)(r1) > + ld r15,STK_REG(R14)(r1) > .Lexit: > addi r1,r1,STACKFRAMESIZE > .Ldo_err1: > @@ -131,15 +131,15 @@ err1; stw r0,0(r3) > > mflr r0 > stdu r1,-STACKFRAMESIZE(r1) > - std r14,STK_REG(R14)(r1) > - std r15,STK_REG(R15)(r1) > - std r16,STK_REG(R16)(r1) > - std r17,STK_REG(R17)(r1) > - std r18,STK_REG(R18)(r1) > - std r19,STK_REG(R19)(r1) > - std r20,STK_REG(R20)(r1) > - std r21,STK_REG(R21)(r1) > - std r22,STK_REG(R22)(r1) > + std r15,STK_REG(R14)(r1) > + std r16,STK_REG(R15)(r1) > + std r17,STK_REG(R16)(r1) > + std r18,STK_REG(R17)(r1) > + std r19,STK_REG(R18)(r1) > + std r20,STK_REG(R19)(r1) > + std r21,STK_REG(R20)(r1) > + std r22,STK_REG(R21)(r1) > + std r23,STK_REG(R22)(r1) > std r0,STACKFRAMESIZE+16(r1) > > srdi r6,r5,7 > @@ -156,14 +156,14 @@ err2; ld r9,32(r4) > err2; ld r10,40(r4) > err2; ld r11,48(r4) > err2; ld r12,56(r4) > -err2; ld r14,64(r4) > -err2; ld r15,72(r4) > -err2; ld r16,80(r4) > -err2; ld r17,88(r4) > -err2; ld r18,96(r4) > -err2; ld r19,104(r4) > -err2; ld r20,112(r4) > -err2; ld r21,120(r4) > +err2; ld r15,64(r4) > +err2; ld r16,72(r4) > +err2; ld r17,80(r4) > +err2; ld r18,88(r4) > +err2; ld r19,96(r4) > +err2; ld r20,104(r4) > +err2; ld r21,112(r4) > +err2; ld r22,120(r4) > addi r4,r4,128 > err2; std r0,0(r3) > err2; std r6,8(r3) > @@ -173,28 +173,28 @@ err2; std r9,32(r3) > err2; std r10,40(r3) > err2; std r11,48(r3) > err2; std r12,56(r3) > -err2; std r14,64(r3) > -err2; std r15,72(r3) > -err2; std r16,80(r3) > -err2; std r17,88(r3) > -err2; std r18,96(r3) > -err2; std r19,104(r3) > -err2; std r20,112(r3) > -err2; std r21,120(r3) > +err2; std r15,64(r3) > +err2; std r16,72(r3) > +err2; std r17,80(r3) > +err2; std r18,88(r3) > +err2; std r19,96(r3) > +err2; std r20,104(r3) > +err2; std r21,112(r3) > +err2; std r22,120(r3) > addi r3,r3,128 > bdnz 4b > > clrldi r5,r5,(64-7) > > - ld r14,STK_REG(R14)(r1) > - ld r15,STK_REG(R15)(r1) > - ld r16,STK_REG(R16)(r1) > - ld r17,STK_REG(R17)(r1) > - ld r18,STK_REG(R18)(r1) > - ld r19,STK_REG(R19)(r1) > - ld r20,STK_REG(R20)(r1) > - ld r21,STK_REG(R21)(r1) > - ld r22,STK_REG(R22)(r1) > + ld r15,STK_REG(R14)(r1) > + ld r16,STK_REG(R15)(r1) > + ld r17,STK_REG(R16)(r1) > + ld r18,STK_REG(R17)(r1) > + ld r19,STK_REG(R18)(r1) > + ld r20,STK_REG(R19)(r1) > + ld r21,STK_REG(R20)(r1) > + ld r22,STK_REG(R21)(r1) > + ld r23,STK_REG(R22)(r1) > addi r1,r1,STACKFRAMESIZE > > /* Up to 127B to go */ > @@ -405,14 +405,14 @@ err3; stvx v0,r3,r11 > 7: sub r5,r5,r6 > srdi r6,r5,7 > > - std r14,STK_REG(R14)(r1) > - std r15,STK_REG(R15)(r1) > - std r16,STK_REG(R16)(r1) > + std r15,STK_REG(R14)(r1) > + std r16,STK_REG(R15)(r1) > + std r17,STK_REG(R16)(r1) > > li r12,64 > - li r14,80 > - li r15,96 > - li r16,112 > + li r15,80 > + li r16,96 > + li r17,112 > > mtctr r6 > > @@ -427,24 +427,24 @@ err4; lvx v6,r4,r9 > err4; lvx v5,r4,r10 > err4; lvx v4,r4,r11 > err4; lvx v3,r4,r12 > -err4; lvx v2,r4,r14 > -err4; lvx v1,r4,r15 > -err4; lvx v0,r4,r16 > +err4; lvx v2,r4,r15 > +err4; lvx v1,r4,r16 > +err4; lvx v0,r4,r17 > addi r4,r4,128 > err4; stvx v7,r0,r3 > err4; stvx v6,r3,r9 > err4; stvx v5,r3,r10 > err4; stvx v4,r3,r11 > err4; stvx v3,r3,r12 > -err4; stvx v2,r3,r14 > -err4; stvx v1,r3,r15 > -err4; stvx v0,r3,r16 > +err4; stvx v2,r3,r15 > +err4; stvx v1,r3,r16 > +err4; stvx v0,r3,r17 > addi r3,r3,128 > bdnz 8b > > - ld r14,STK_REG(R14)(r1) > - ld r15,STK_REG(R15)(r1) > - ld r16,STK_REG(R16)(r1) > + ld r15,STK_REG(R14)(r1) > + ld r16,STK_REG(R15)(r1) > + ld r17,STK_REG(R16)(r1) > > /* Up to 127B to go */ > clrldi r5,r5,(64-7) > @@ -590,14 +590,14 @@ err3; stvx v11,r3,r11 > 7: sub r5,r5,r6 > srdi r6,r5,7 > > - std r14,STK_REG(R14)(r1) > - std r15,STK_REG(R15)(r1) > - std r16,STK_REG(R16)(r1) > + std r15,STK_REG(R14)(r1) > + std r16,STK_REG(R15)(r1) > + std r17,STK_REG(R16)(r1) > > li r12,64 > - li r14,80 > - li r15,96 > - li r16,112 > + li r15,80 > + li r16,96 > + li r17,112 > > mtctr r6 > > @@ -617,11 +617,11 @@ err4; lvx v4,r4,r11 > VPERM(v11,v5,v4,v16) > err4; lvx v3,r4,r12 > VPERM(v12,v4,v3,v16) > -err4; lvx v2,r4,r14 > +err4; lvx v2,r4,r15 > VPERM(v13,v3,v2,v16) > -err4; lvx v1,r4,r15 > +err4; lvx v1,r4,r16 > VPERM(v14,v2,v1,v16) > -err4; lvx v0,r4,r16 > +err4; lvx v0,r4,r17 > VPERM(v15,v1,v0,v16) > addi r4,r4,128 > err4; stvx v8,r0,r3 > @@ -629,15 +629,15 @@ err4; stvx v9,r3,r9 > err4; stvx v10,r3,r10 > err4; stvx v11,r3,r11 > err4; stvx v12,r3,r12 > -err4; stvx v13,r3,r14 > -err4; stvx v14,r3,r15 > -err4; stvx v15,r3,r16 > +err4; stvx v13,r3,r15 > +err4; stvx v14,r3,r16 > +err4; stvx v15,r3,r17 > addi r3,r3,128 > bdnz 8b > > - ld r14,STK_REG(R14)(r1) > - ld r15,STK_REG(R15)(r1) > - ld r16,STK_REG(R16)(r1) > + ld r15,STK_REG(R14)(r1) > + ld r16,STK_REG(R15)(r1) > + ld r17,STK_REG(R16)(r1) > > /* Up to 127B to go */ > clrldi r5,r5,(64-7) > diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S > index 7e5e1c28e56a..c46ad2f0a718 100644 > --- a/arch/powerpc/lib/crtsavres.S > +++ b/arch/powerpc/lib/crtsavres.S > @@ -314,9 +314,12 @@ _GLOBAL(_restvr_31) > > #else /* CONFIG_PPC64 */ > > +/* 64-bit has -ffixed-r13, Book3S also has -ffixed-r14 */ > +#ifdef CONFIG_PPC_BOOK3E > .globl _savegpr0_14 > _savegpr0_14: > std r14,-144(r1) > +#endif > .globl _savegpr0_15 > _savegpr0_15: > std r15,-136(r1) > diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S > index 95ca426637eb..6c0684e5e0d3 100644 > --- a/arch/powerpc/lib/memcpy_power7.S > +++ b/arch/powerpc/lib/memcpy_power7.S > @@ -76,7 +76,6 @@ _GLOBAL(memcpy_power7) > > mflr r0 > stdu r1,-STACKFRAMESIZE(r1) > - std r14,STK_REG(R14)(r1) > std r15,STK_REG(R15)(r1) > std r16,STK_REG(R16)(r1) > std r17,STK_REG(R17)(r1) > @@ -85,6 +84,7 @@ _GLOBAL(memcpy_power7) > std r20,STK_REG(R20)(r1) > std r21,STK_REG(R21)(r1) > std r22,STK_REG(R22)(r1) > + std r23,STK_REG(R23)(r1) > std r0,STACKFRAMESIZE+16(r1) > > srdi r6,r5,7 > @@ -101,14 +101,14 @@ _GLOBAL(memcpy_power7) > ld r10,40(r4) > ld r11,48(r4) > ld r12,56(r4) > - ld r14,64(r4) > - ld r15,72(r4) > - ld r16,80(r4) > - ld r17,88(r4) > - ld r18,96(r4) > - ld r19,104(r4) > - ld r20,112(r4) > - ld r21,120(r4) > + ld r15,64(r4) > + ld r16,72(r4) > + ld r17,80(r4) > + ld r18,88(r4) > + ld r19,96(r4) > + ld r20,104(r4) > + ld r21,112(r4) > + ld r22,120(r4) > addi r4,r4,128 > std r0,0(r3) > std r6,8(r3) > @@ -118,20 +118,19 @@ _GLOBAL(memcpy_power7) > std r10,40(r3) > std r11,48(r3) > std r12,56(r3) > - std r14,64(r3) > - std r15,72(r3) > - std r16,80(r3) > - std r17,88(r3) > - std r18,96(r3) > - std r19,104(r3) > - std r20,112(r3) > - std r21,120(r3) > + std r15,64(r3) > + std r16,72(r3) > + std r17,80(r3) > + std r18,88(r3) > + std r19,96(r3) > + std r20,104(r3) > + std r21,112(r3) > + std r22,120(r3) > addi r3,r3,128 > bdnz 4b > > clrldi r5,r5,(64-7) > > - ld r14,STK_REG(R14)(r1) > ld r15,STK_REG(R15)(r1) > ld r16,STK_REG(R16)(r1) > ld r17,STK_REG(R17)(r1) > @@ -140,6 +139,7 @@ _GLOBAL(memcpy_power7) > ld r20,STK_REG(R20)(r1) > ld r21,STK_REG(R21)(r1) > ld r22,STK_REG(R22)(r1) > + ld r23,STK_REG(R23)(r1) > addi r1,r1,STACKFRAMESIZE > > /* Up to 127B to go */ > @@ -350,14 +350,14 @@ _GLOBAL(memcpy_power7) > 7: sub r5,r5,r6 > srdi r6,r5,7 > > - std r14,STK_REG(R14)(r1) > std r15,STK_REG(R15)(r1) > std r16,STK_REG(R16)(r1) > + std r17,STK_REG(R17)(r1) > > li r12,64 > - li r14,80 > - li r15,96 > - li r16,112 > + li r15,80 > + li r16,96 > + li r17,112 > > mtctr r6 > > @@ -372,24 +372,24 @@ _GLOBAL(memcpy_power7) > lvx v5,r4,r10 > lvx v4,r4,r11 > lvx v3,r4,r12 > - lvx v2,r4,r14 > - lvx v1,r4,r15 > - lvx v0,r4,r16 > + lvx v2,r4,r15 > + lvx v1,r4,r16 > + lvx v0,r4,r17 > addi r4,r4,128 > stvx v7,r0,r3 > stvx v6,r3,r9 > stvx v5,r3,r10 > stvx v4,r3,r11 > stvx v3,r3,r12 > - stvx v2,r3,r14 > - stvx v1,r3,r15 > - stvx v0,r3,r16 > + stvx v2,r3,r15 > + stvx v1,r3,r16 > + stvx v0,r3,r17 > addi r3,r3,128 > bdnz 8b > > - ld r14,STK_REG(R14)(r1) > ld r15,STK_REG(R15)(r1) > ld r16,STK_REG(R16)(r1) > + ld r17,STK_REG(R17)(r1) > > /* Up to 127B to go */ > clrldi r5,r5,(64-7) > @@ -536,14 +536,14 @@ _GLOBAL(memcpy_power7) > 7: sub r5,r5,r6 > srdi r6,r5,7 > > - std r14,STK_REG(R14)(r1) > std r15,STK_REG(R15)(r1) > std r16,STK_REG(R16)(r1) > + std r17,STK_REG(R17)(r1) > > li r12,64 > - li r14,80 > - li r15,96 > - li r16,112 > + li r15,80 > + li r16,96 > + li r17,112 > > mtctr r6 > > @@ -563,11 +563,11 @@ _GLOBAL(memcpy_power7) > VPERM(v11,v5,v4,v16) > lvx v3,r4,r12 > VPERM(v12,v4,v3,v16) > - lvx v2,r4,r14 > + lvx v2,r4,r15 > VPERM(v13,v3,v2,v16) > - lvx v1,r4,r15 > + lvx v1,r4,r16 > VPERM(v14,v2,v1,v16) > - lvx v0,r4,r16 > + lvx v0,r4,r17 > VPERM(v15,v1,v0,v16) > addi r4,r4,128 > stvx v8,r0,r3 > @@ -575,15 +575,15 @@ _GLOBAL(memcpy_power7) > stvx v10,r3,r10 > stvx v11,r3,r11 > stvx v12,r3,r12 > - stvx v13,r3,r14 > - stvx v14,r3,r15 > - stvx v15,r3,r16 > + stvx v13,r3,r15 > + stvx v14,r3,r16 > + stvx v15,r3,r17 > addi r3,r3,128 > bdnz 8b > > - ld r14,STK_REG(R14)(r1) > ld r15,STK_REG(R15)(r1) > ld r16,STK_REG(R16)(r1) > + ld r17,STK_REG(R17)(r1) > > /* Up to 127B to go */ > clrldi r5,r5,(64-7) > diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h > index a8cd7e289ecd..52a30db033c1 100644 > --- a/arch/powerpc/net/bpf_jit32.h > +++ b/arch/powerpc/net/bpf_jit32.h > @@ -44,9 +44,11 @@ > * A register r4 > * X register r5 > * addr param r6 > - * r7-r10 scratch > - * skb->data r14 > - * skb headlen r15 (skb->len - skb->data_len) > + * scratch r7-r8 > + * skb headlen r9 (skb->len - skb->data_len) > + * skb->data r10 > + * fixed regs r13-r14 > + * unused r15 > * m[0] r16 > * m[...] ... > * m[15] r31 > @@ -58,8 +60,8 @@ > #define r_addr 6 > #define r_scratch1 7 > #define r_scratch2 8 > -#define r_D 14 > -#define r_HL 15 > +#define r_HL 9 > +#define r_D 10 > #define r_M 16 > > #ifndef __ASSEMBLY__ > diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S > index 3dd9c43d40c9..5b06152052f6 100644 > --- a/arch/powerpc/net/bpf_jit_asm.S > +++ b/arch/powerpc/net/bpf_jit_asm.S > @@ -19,8 +19,8 @@ > * r3 skb > * r4,r5 A,X > * r6 *** address parameter to helper *** > - * r7-r10 scratch > - * r14 skb->data > + * r7-r9 scratch > + * r10 skb->data > * r15 skb headlen > * r16-31 M[] > */ ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC][PATCH] powerpc/64s: stop using r14 register 2017-05-21 22:09 ` Benjamin Herrenschmidt @ 2017-05-22 0:29 ` Nicholas Piggin 2017-05-22 4:02 ` Michael Ellerman 0 siblings, 1 reply; 7+ messages in thread From: Nicholas Piggin @ 2017-05-22 0:29 UTC (permalink / raw) To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Anton Blanchard On Mon, 22 May 2017 08:09:19 +1000 Benjamin Herrenschmidt <benh@au1.ibm.com> wrote: > On Mon, 2017-05-22 at 00:00 +1000, Nicholas Piggin wrote: > > I'd like to take over the r14 register for use as a per-cpu kernel > > register similar to the way r13 is used for the paca. > > Why not use r13 instead ? We don't need to access the PACA that often > from C code, I thought we could flip them... It ended up being a bit too tricky to do it that way. We can't get directly to per-CPU data from the per-cpu data offset in exception entry code for a number of reasons. So we end up having to load the paca first. I looked at swapping r13 from paca to per-cpu offset at the same time the stack is set up, so paca is used for early save areas then per-cpu is used in C code. In practice it ended up getting too tricky and fragile dealing with nested interrupts, machine checks, etc. I think it's something we might slowly work towards (consolidating back to one fixed register), but as things are it didn't work well. It's easy to drop the fixed r14 in future if we can. None of the incidental asm users of r14 were complicating them by moving them to another register. Thanks, Nick ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC][PATCH] powerpc/64s: stop using r14 register 2017-05-22 0:29 ` Nicholas Piggin @ 2017-05-22 4:02 ` Michael Ellerman 0 siblings, 0 replies; 7+ messages in thread From: Michael Ellerman @ 2017-05-22 4:02 UTC (permalink / raw) To: Nicholas Piggin, Benjamin Herrenschmidt; +Cc: linuxppc-dev, Anton Blanchard Nicholas Piggin <npiggin@gmail.com> writes: > On Mon, 22 May 2017 08:09:19 +1000 > Benjamin Herrenschmidt <benh@au1.ibm.com> wrote: > >> On Mon, 2017-05-22 at 00:00 +1000, Nicholas Piggin wrote: >> > I'd like to take over the r14 register for use as a per-cpu kernel >> > register similar to the way r13 is used for the paca. >> >> Why not use r13 instead ? We don't need to access the PACA that often >> from C code, I thought we could flip them... > > It ended up being a bit too tricky to do it that way. We can't get > directly to per-CPU data from the per-cpu data offset in exception > entry code for a number of reasons. So we end up having to load the > paca first. > > I looked at swapping r13 from paca to per-cpu offset at the same > time the stack is set up, so paca is used for early save areas then > per-cpu is used in C code. In practice it ended up getting too > tricky and fragile dealing with nested interrupts, machine checks, > etc. > > I think it's something we might slowly work towards (consolidating > back to one fixed register), but as things are it didn't work well. Yep, agree 100%. We've talked about flipping r13 for 10 years, rather than spending another 10 this is a good way to evolve things in the right direction. cheers ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC][PATCH] powerpc/64s: stop using r14 register 2017-05-21 14:00 [RFC][PATCH] powerpc/64s: stop using r14 register Nicholas Piggin 2017-05-21 22:09 ` Benjamin Herrenschmidt @ 2017-05-30 19:08 ` Naveen N. Rao 2017-05-31 0:53 ` Nicholas Piggin 1 sibling, 1 reply; 7+ messages in thread From: Naveen N. Rao @ 2017-05-30 19:08 UTC (permalink / raw) To: Nicholas Piggin; +Cc: linuxppc-dev, Anton Blanchard On 2017/05/22 12:00AM, Nicholas Piggin wrote: > I'd like to take over the r14 register for use as a per-cpu kernel > register similar to the way r13 is used for the paca. > > r14 being the last non-volatile register gcc allocates, appears with > about 0.5% the frequency as r31 in (static) instructions. I haven't > counted dynamically how many extra spills and fills that removing it > causes, but I should. My guess is the memory ops saved by using > it as a per-cpu variable will significantly outweigh the cost of > losing it as a general use register. > > This part of the patch is pretty mechanical. A couple of places (prom) > still have to use it, and I haven't quite understood the KVM code yet. > > Question is whether this approach seems okay, and whether we should do > the same for 64e. > > Thanks, > Nick > > --- [snip] > diff --git a/arch/powerpc/net/bpf_jit32.h > b/arch/powerpc/net/bpf_jit32.h > index a8cd7e289ecd..52a30db033c1 100644 > --- a/arch/powerpc/net/bpf_jit32.h > +++ b/arch/powerpc/net/bpf_jit32.h > @@ -44,9 +44,11 @@ > * A register r4 > * X register r5 > * addr param r6 > - * r7-r10 scratch > - * skb->data r14 > - * skb headlen r15 (skb->len - skb->data_len) > + * scratch r7-r8 > + * skb headlen r9 (skb->len - skb->data_len) > + * skb->data r10 > + * fixed regs r13-r14 > + * unused r15 > * m[0] r16 > * m[...] ... > * m[15] r31 > @@ -58,8 +60,8 @@ > #define r_addr 6 > #define r_scratch1 7 > #define r_scratch2 8 > -#define r_D 14 > -#define r_HL 15 > +#define r_HL 9 > +#define r_D 10 You'll also need changes in the JIT code itself, at least in bpf_jit_build_prologue() and elsewhere -- some code expects r_D and r_HL to be NVRs. It's probably easier to just choose other NVRs here... > #define r_M 16 > > #ifndef __ASSEMBLY__ > diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S > index 3dd9c43d40c9..5b06152052f6 100644 > --- a/arch/powerpc/net/bpf_jit_asm.S > +++ b/arch/powerpc/net/bpf_jit_asm.S > @@ -19,8 +19,8 @@ > * r3 skb > * r4,r5 A,X > * r6 *** address parameter to helper *** > - * r7-r10 scratch > - * r14 skb->data > + * r7-r9 scratch > + * r10 skb->data > * r15 skb headlen > * r16-31 M[] This doesn't match the updates to bpf_jit32.h. - Naveen ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC][PATCH] powerpc/64s: stop using r14 register 2017-05-30 19:08 ` Naveen N. Rao @ 2017-05-31 0:53 ` Nicholas Piggin 2017-05-31 5:55 ` Naveen N. Rao 0 siblings, 1 reply; 7+ messages in thread From: Nicholas Piggin @ 2017-05-31 0:53 UTC (permalink / raw) To: Naveen N. Rao; +Cc: linuxppc-dev, Anton Blanchard On Wed, 31 May 2017 00:38:17 +0530 "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> wrote: > On 2017/05/22 12:00AM, Nicholas Piggin wrote: > > I'd like to take over the r14 register for use as a per-cpu kernel > > register similar to the way r13 is used for the paca. > > > > r14 being the last non-volatile register gcc allocates, appears with > > about 0.5% the frequency as r31 in (static) instructions. I haven't > > counted dynamically how many extra spills and fills that removing it > > causes, but I should. My guess is the memory ops saved by using > > it as a per-cpu variable will significantly outweigh the cost of > > losing it as a general use register. > > > > This part of the patch is pretty mechanical. A couple of places (prom) > > still have to use it, and I haven't quite understood the KVM code yet. > > > > Question is whether this approach seems okay, and whether we should do > > the same for 64e. > > > > Thanks, > > Nick > > > > --- > > [snip] > > > diff --git a/arch/powerpc/net/bpf_jit32.h > > b/arch/powerpc/net/bpf_jit32.h > > index a8cd7e289ecd..52a30db033c1 100644 > > --- a/arch/powerpc/net/bpf_jit32.h > > +++ b/arch/powerpc/net/bpf_jit32.h > > @@ -44,9 +44,11 @@ > > * A register r4 > > * X register r5 > > * addr param r6 > > - * r7-r10 scratch > > - * skb->data r14 > > - * skb headlen r15 (skb->len - skb->data_len) > > + * scratch r7-r8 > > + * skb headlen r9 (skb->len - skb->data_len) > > + * skb->data r10 > > + * fixed regs r13-r14 > > + * unused r15 > > * m[0] r16 > > * m[...] ... > > * m[15] r31 > > @@ -58,8 +60,8 @@ > > #define r_addr 6 > > #define r_scratch1 7 > > #define r_scratch2 8 > > -#define r_D 14 > > -#define r_HL 15 > > +#define r_HL 9 > > +#define r_D 10 > > You'll also need changes in the JIT code itself, at least in > bpf_jit_build_prologue() and elsewhere -- some code expects r_D and r_HL > to be NVRs. It's probably easier to just choose other NVRs here... Thanks for taking a look. We're out of non volatile registers here, however... This is for PPC32 only by the looks, so the patch is not required at all. I should have looked a bit more closely. Thanks, Nick ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC][PATCH] powerpc/64s: stop using r14 register 2017-05-31 0:53 ` Nicholas Piggin @ 2017-05-31 5:55 ` Naveen N. Rao 0 siblings, 0 replies; 7+ messages in thread From: Naveen N. Rao @ 2017-05-31 5:55 UTC (permalink / raw) To: Nicholas Piggin; +Cc: linuxppc-dev, Anton Blanchard On 2017/05/31 10:53AM, Nicholas Piggin wrote: > On Wed, 31 May 2017 00:38:17 +0530 > "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> wrote: > > > On 2017/05/22 12:00AM, Nicholas Piggin wrote: > > > I'd like to take over the r14 register for use as a per-cpu kernel > > > register similar to the way r13 is used for the paca. > > > > > > r14 being the last non-volatile register gcc allocates, appears with > > > about 0.5% the frequency as r31 in (static) instructions. I haven't > > > counted dynamically how many extra spills and fills that removing it > > > causes, but I should. My guess is the memory ops saved by using > > > it as a per-cpu variable will significantly outweigh the cost of > > > losing it as a general use register. > > > > > > This part of the patch is pretty mechanical. A couple of places (prom) > > > still have to use it, and I haven't quite understood the KVM code yet. > > > > > > Question is whether this approach seems okay, and whether we should do > > > the same for 64e. > > > > > > Thanks, > > > Nick > > > > > > --- > > > > [snip] > > > > > diff --git a/arch/powerpc/net/bpf_jit32.h > > > b/arch/powerpc/net/bpf_jit32.h > > > index a8cd7e289ecd..52a30db033c1 100644 > > > --- a/arch/powerpc/net/bpf_jit32.h > > > +++ b/arch/powerpc/net/bpf_jit32.h > > > @@ -44,9 +44,11 @@ > > > * A register r4 > > > * X register r5 > > > * addr param r6 > > > - * r7-r10 scratch > > > - * skb->data r14 > > > - * skb headlen r15 (skb->len - skb->data_len) > > > + * scratch r7-r8 > > > + * skb headlen r9 (skb->len - skb->data_len) > > > + * skb->data r10 > > > + * fixed regs r13-r14 > > > + * unused r15 > > > * m[0] r16 > > > * m[...] ... > > > * m[15] r31 > > > @@ -58,8 +60,8 @@ > > > #define r_addr 6 > > > #define r_scratch1 7 > > > #define r_scratch2 8 > > > -#define r_D 14 > > > -#define r_HL 15 > > > +#define r_HL 9 > > > +#define r_D 10 > > > > You'll also need changes in the JIT code itself, at least in > > bpf_jit_build_prologue() and elsewhere -- some code expects r_D and r_HL > > to be NVRs. It's probably easier to just choose other NVRs here... > > Thanks for taking a look. We're out of non volatile registers here, > however... This is for PPC32 only by the looks, so the patch is not > required at all. Ah, indeed. Good catch. - Naveen ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-05-31 5:56 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-05-21 14:00 [RFC][PATCH] powerpc/64s: stop using r14 register Nicholas Piggin 2017-05-21 22:09 ` Benjamin Herrenschmidt 2017-05-22 0:29 ` Nicholas Piggin 2017-05-22 4:02 ` Michael Ellerman 2017-05-30 19:08 ` Naveen N. Rao 2017-05-31 0:53 ` Nicholas Piggin 2017-05-31 5:55 ` Naveen N. Rao
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.