* [PATCH 0/2] Add POWER9 DD2.0 feature, remove idle workarounds in DD2.1 @ 2017-10-20 4:54 Nicholas Piggin 2017-10-20 4:54 ` [PATCH 1/2] powerpc: add POWER9_DD20 feature Nicholas Piggin 2017-10-20 4:54 ` [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 Nicholas Piggin 0 siblings, 2 replies; 4+ messages in thread From: Nicholas Piggin @ 2017-10-20 4:54 UTC (permalink / raw) To: linuxppc-dev; +Cc: Nicholas Piggin Nicholas Piggin (2): powerpc: add POWER9_DD20 feature powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 arch/powerpc/include/asm/cputable.h | 5 ++++- arch/powerpc/kernel/cputable.c | 20 +++++++++++++++++++ arch/powerpc/kernel/dt_cpu_ftrs.c | 2 ++ arch/powerpc/kernel/idle_book3s.S | 39 ++++++++++++++++++++++++++----------- 4 files changed, 54 insertions(+), 12 deletions(-) -- 2.13.3 ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] powerpc: add POWER9_DD20 feature 2017-10-20 4:54 [PATCH 0/2] Add POWER9 DD2.0 feature, remove idle workarounds in DD2.1 Nicholas Piggin @ 2017-10-20 4:54 ` Nicholas Piggin 2017-10-20 4:54 ` [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 Nicholas Piggin 1 sibling, 0 replies; 4+ messages in thread From: Nicholas Piggin @ 2017-10-20 4:54 UTC (permalink / raw) To: linuxppc-dev; +Cc: Nicholas Piggin, Michael Neuling Cc: Michael Neuling <mikey@neuling.org> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- arch/powerpc/include/asm/cputable.h | 5 ++++- arch/powerpc/kernel/cputable.c | 20 ++++++++++++++++++++ arch/powerpc/kernel/dt_cpu_ftrs.c | 2 ++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a9bf921f4efc..194dc3006446 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -215,6 +215,7 @@ enum { #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) +#define CPU_FTR_POWER9_DD20 LONG_ASM_CONST(0x8000000000000000) #ifndef __ASSEMBLY__ @@ -477,6 +478,7 @@ enum { CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) +#define CPU_FTRS_POWER9_DD20 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD20) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -495,7 +497,8 @@ enum { (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ - CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1) + CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \ + CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD20) #endif #else enum { diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 760872916013..171820190de7 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -547,6 +547,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, + { /* Power9 DD2.0 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0200, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD20, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, { /* Power9 */ .pvr_mask = 0xffff0000, .pvr_value = 0x004e0000, diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 7275fed271af..63b9d7edd63f 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -735,6 +735,8 @@ static __init void cpufeatures_cpu_quirks(void) */ if ((version & 0xffffff00) == 0x004e0100) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; + else if ((version & 0xffffefff) == 0x004e0200) + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD20; } static void __init cpufeatures_setup_finished(void) -- 2.13.3 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 2017-10-20 4:54 [PATCH 0/2] Add POWER9 DD2.0 feature, remove idle workarounds in DD2.1 Nicholas Piggin 2017-10-20 4:54 ` [PATCH 1/2] powerpc: add POWER9_DD20 feature Nicholas Piggin @ 2017-10-20 4:54 ` Nicholas Piggin 2017-10-20 11:11 ` Vaidyanathan Srinivasan 1 sibling, 1 reply; 4+ messages in thread From: Nicholas Piggin @ 2017-10-20 4:54 UTC (permalink / raw) To: linuxppc-dev; +Cc: Nicholas Piggin, Vaidyanathan Srinivasan DD2.1 does not have to flush the ERAT after a state-loss idle. It also does not have to save and restore MMCR0. Performance testing was done on a DD2.1 using only the stop0 idle state (the shallowest state which supports state loss), using context_switch selftest configured to ping-poing between two threads on the same core and two different cores. Performance improvement for same core is 7.0%, different cores is 14.8%. Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- arch/powerpc/kernel/idle_book3s.S | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 1125c9be9e06..3531a3e727d2 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -112,12 +112,14 @@ power9_save_additional_sprs: std r4, STOP_HFSCR(r13) mfspr r3, SPRN_MMCRA - mfspr r4, SPRN_MMCR1 + mfspr r4, SPRN_MMCR0 std r3, STOP_MMCRA(r13) - std r4, STOP_MMCR1(r13) + std r4, _MMCR0(r1) - mfspr r3, SPRN_MMCR2 - std r3, STOP_MMCR2(r13) + mfspr r3, SPRN_MMCR1 + mfspr r4, SPRN_MMCR2 + std r3, STOP_MMCR1(r13) + std r4, STOP_MMCR2(r13) blr power9_restore_additional_sprs: @@ -135,11 +137,14 @@ power9_restore_additional_sprs: ld r4, STOP_MMCRA(r13) mtspr SPRN_HFSCR, r3 mtspr SPRN_MMCRA, r4 - /* We have already restored PACA_MMCR0 */ - ld r3, STOP_MMCR1(r13) - ld r4, STOP_MMCR2(r13) - mtspr SPRN_MMCR1, r3 - mtspr SPRN_MMCR2, r4 + + ld r3, _MMCR0(r1) + ld r4, STOP_MMCR1(r13) + mtspr SPRN_MMCR0, r3 + mtspr SPRN_MMCR1, r4 + + ld r3, STOP_MMCR2(r13) + mtspr SPRN_MMCR2, r3 blr /* @@ -357,6 +362,7 @@ power_enter_stop: b pnv_wakeup_noloss .Lhandle_esl_ec_set: +BEGIN_FTR_SECTION /* * POWER9 DD2 can incorrectly set PMAO when waking up after a * state-loss idle. Saving and restoring MMCR0 over idle is a @@ -364,6 +370,10 @@ power_enter_stop: */ mfspr r4,SPRN_MMCR0 std r4,_MMCR0(r1) +FTR_SECTION_ELSE + nop + nop +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20) /* * Check if the requested state is a deep idle state. @@ -555,15 +565,22 @@ pnv_restore_hyp_resource_arch300: * then clear bit 60 in MMCRA to ensure the PMU starts running. */ blt cr3,1f +BEGIN_FTR_SECTION + nop + nop + nop + nop +FTR_SECTION_ELSE PPC_INVALIDATE_ERAT ld r1,PACAR1(r13) + ld r4,_MMCR0(r1) + mtspr SPRN_MMCR0,r4 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20) mfspr r4,SPRN_MMCRA ori r4,r4,(1 << (63-60)) mtspr SPRN_MMCRA,r4 xori r4,r4,(1 << (63-60)) mtspr SPRN_MMCRA,r4 - ld r4,_MMCR0(r1) - mtspr SPRN_MMCR0,r4 1: /* * POWER ISA 3. Use PSSCR to determine if we -- 2.13.3 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 2017-10-20 4:54 ` [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 Nicholas Piggin @ 2017-10-20 11:11 ` Vaidyanathan Srinivasan 0 siblings, 0 replies; 4+ messages in thread From: Vaidyanathan Srinivasan @ 2017-10-20 11:11 UTC (permalink / raw) To: Nicholas Piggin; +Cc: linuxppc-dev * Nicholas Piggin <npiggin@gmail.com> [2017-10-20 14:54:44]: > DD2.1 does not have to flush the ERAT after a state-loss idle. It also > does not have to save and restore MMCR0. > > Performance testing was done on a DD2.1 using only the stop0 idle state > (the shallowest state which supports state loss), using context_switch > selftest configured to ping-poing between two threads on the same core ^pong > and two different cores. > > Performance improvement for same core is 7.0%, different cores is 14.8%. Good improvement as we remove unnecessary code and workarounds. > Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> > --- > arch/powerpc/kernel/idle_book3s.S | 39 ++++++++++++++++++++++++++++----------- > 1 file changed, 28 insertions(+), 11 deletions(-) > > diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S > index 1125c9be9e06..3531a3e727d2 100644 > --- a/arch/powerpc/kernel/idle_book3s.S > +++ b/arch/powerpc/kernel/idle_book3s.S > @@ -112,12 +112,14 @@ power9_save_additional_sprs: > std r4, STOP_HFSCR(r13) > > mfspr r3, SPRN_MMCRA > - mfspr r4, SPRN_MMCR1 > + mfspr r4, SPRN_MMCR0 > std r3, STOP_MMCRA(r13) > - std r4, STOP_MMCR1(r13) > + std r4, _MMCR0(r1) > > - mfspr r3, SPRN_MMCR2 > - std r3, STOP_MMCR2(r13) > + mfspr r3, SPRN_MMCR1 > + mfspr r4, SPRN_MMCR2 > + std r3, STOP_MMCR1(r13) > + std r4, STOP_MMCR2(r13) > blr > > power9_restore_additional_sprs: > @@ -135,11 +137,14 @@ power9_restore_additional_sprs: > ld r4, STOP_MMCRA(r13) > mtspr SPRN_HFSCR, r3 > mtspr SPRN_MMCRA, r4 > - /* We have already restored PACA_MMCR0 */ > - ld r3, STOP_MMCR1(r13) > - ld r4, STOP_MMCR2(r13) > - mtspr SPRN_MMCR1, r3 > - mtspr SPRN_MMCR2, r4 > + > + ld r3, _MMCR0(r1) > + ld r4, STOP_MMCR1(r13) > + mtspr SPRN_MMCR0, r3 > + mtspr SPRN_MMCR1, r4 > + > + ld r3, STOP_MMCR2(r13) > + mtspr SPRN_MMCR2, r3 > blr > > /* > @@ -357,6 +362,7 @@ power_enter_stop: > b pnv_wakeup_noloss > > .Lhandle_esl_ec_set: > +BEGIN_FTR_SECTION > /* > * POWER9 DD2 can incorrectly set PMAO when waking up after a > * state-loss idle. Saving and restoring MMCR0 over idle is a > @@ -364,6 +370,10 @@ power_enter_stop: > */ > mfspr r4,SPRN_MMCR0 > std r4,_MMCR0(r1) > +FTR_SECTION_ELSE > + nop > + nop > +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20) > > /* > * Check if the requested state is a deep idle state. > @@ -555,15 +565,22 @@ pnv_restore_hyp_resource_arch300: > * then clear bit 60 in MMCRA to ensure the PMU starts running. > */ > blt cr3,1f > +BEGIN_FTR_SECTION > + nop > + nop > + nop > + nop > +FTR_SECTION_ELSE > PPC_INVALIDATE_ERAT > ld r1,PACAR1(r13) > + ld r4,_MMCR0(r1) > + mtspr SPRN_MMCR0,r4 > +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20) > mfspr r4,SPRN_MMCRA > ori r4,r4,(1 << (63-60)) > mtspr SPRN_MMCRA,r4 > xori r4,r4,(1 << (63-60)) > mtspr SPRN_MMCRA,r4 > - ld r4,_MMCR0(r1) > - mtspr SPRN_MMCR0,r4 > 1: > /* > * POWER ISA 3. Use PSSCR to determine if we > -- > 2.13.3 > ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-10-20 11:11 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-10-20 4:54 [PATCH 0/2] Add POWER9 DD2.0 feature, remove idle workarounds in DD2.1 Nicholas Piggin 2017-10-20 4:54 ` [PATCH 1/2] powerpc: add POWER9_DD20 feature Nicholas Piggin 2017-10-20 4:54 ` [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 Nicholas Piggin 2017-10-20 11:11 ` Vaidyanathan Srinivasan
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.