* [PATCH 0/2] Add POWER9 DD2.0 feature, remove idle workarounds in DD2.1
@ 2017-10-20 4:54 Nicholas Piggin
2017-10-20 4:54 ` [PATCH 1/2] powerpc: add POWER9_DD20 feature Nicholas Piggin
2017-10-20 4:54 ` [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 Nicholas Piggin
0 siblings, 2 replies; 4+ messages in thread
From: Nicholas Piggin @ 2017-10-20 4:54 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
Nicholas Piggin (2):
powerpc: add POWER9_DD20 feature
powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on
DD2.1
arch/powerpc/include/asm/cputable.h | 5 ++++-
arch/powerpc/kernel/cputable.c | 20 +++++++++++++++++++
arch/powerpc/kernel/dt_cpu_ftrs.c | 2 ++
arch/powerpc/kernel/idle_book3s.S | 39 ++++++++++++++++++++++++++-----------
4 files changed, 54 insertions(+), 12 deletions(-)
--
2.13.3
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] powerpc: add POWER9_DD20 feature
2017-10-20 4:54 [PATCH 0/2] Add POWER9 DD2.0 feature, remove idle workarounds in DD2.1 Nicholas Piggin
@ 2017-10-20 4:54 ` Nicholas Piggin
2017-10-20 4:54 ` [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 Nicholas Piggin
1 sibling, 0 replies; 4+ messages in thread
From: Nicholas Piggin @ 2017-10-20 4:54 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin, Michael Neuling
Cc: Michael Neuling <mikey@neuling.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/cputable.h | 5 ++++-
arch/powerpc/kernel/cputable.c | 20 ++++++++++++++++++++
arch/powerpc/kernel/dt_cpu_ftrs.c | 2 ++
3 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index a9bf921f4efc..194dc3006446 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -215,6 +215,7 @@ enum {
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
+#define CPU_FTR_POWER9_DD20 LONG_ASM_CONST(0x8000000000000000)
#ifndef __ASSEMBLY__
@@ -477,6 +478,7 @@ enum {
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
(~CPU_FTR_SAO))
+#define CPU_FTRS_POWER9_DD20 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD20)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -495,7 +497,8 @@ enum {
(CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1)
+ CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \
+ CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD20)
#endif
#else
enum {
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 760872916013..171820190de7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -547,6 +547,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
+ { /* Power9 DD2.0 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0200,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD20,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .flush_tlb = __flush_tlb_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
{ /* Power9 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 7275fed271af..63b9d7edd63f 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -735,6 +735,8 @@ static __init void cpufeatures_cpu_quirks(void)
*/
if ((version & 0xffffff00) == 0x004e0100)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+ else if ((version & 0xffffefff) == 0x004e0200)
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD20;
}
static void __init cpufeatures_setup_finished(void)
--
2.13.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1
2017-10-20 4:54 [PATCH 0/2] Add POWER9 DD2.0 feature, remove idle workarounds in DD2.1 Nicholas Piggin
2017-10-20 4:54 ` [PATCH 1/2] powerpc: add POWER9_DD20 feature Nicholas Piggin
@ 2017-10-20 4:54 ` Nicholas Piggin
2017-10-20 11:11 ` Vaidyanathan Srinivasan
1 sibling, 1 reply; 4+ messages in thread
From: Nicholas Piggin @ 2017-10-20 4:54 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin, Vaidyanathan Srinivasan
DD2.1 does not have to flush the ERAT after a state-loss idle. It also
does not have to save and restore MMCR0.
Performance testing was done on a DD2.1 using only the stop0 idle state
(the shallowest state which supports state loss), using context_switch
selftest configured to ping-poing between two threads on the same core
and two different cores.
Performance improvement for same core is 7.0%, different cores is 14.8%.
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kernel/idle_book3s.S | 39 ++++++++++++++++++++++++++++-----------
1 file changed, 28 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 1125c9be9e06..3531a3e727d2 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -112,12 +112,14 @@ power9_save_additional_sprs:
std r4, STOP_HFSCR(r13)
mfspr r3, SPRN_MMCRA
- mfspr r4, SPRN_MMCR1
+ mfspr r4, SPRN_MMCR0
std r3, STOP_MMCRA(r13)
- std r4, STOP_MMCR1(r13)
+ std r4, _MMCR0(r1)
- mfspr r3, SPRN_MMCR2
- std r3, STOP_MMCR2(r13)
+ mfspr r3, SPRN_MMCR1
+ mfspr r4, SPRN_MMCR2
+ std r3, STOP_MMCR1(r13)
+ std r4, STOP_MMCR2(r13)
blr
power9_restore_additional_sprs:
@@ -135,11 +137,14 @@ power9_restore_additional_sprs:
ld r4, STOP_MMCRA(r13)
mtspr SPRN_HFSCR, r3
mtspr SPRN_MMCRA, r4
- /* We have already restored PACA_MMCR0 */
- ld r3, STOP_MMCR1(r13)
- ld r4, STOP_MMCR2(r13)
- mtspr SPRN_MMCR1, r3
- mtspr SPRN_MMCR2, r4
+
+ ld r3, _MMCR0(r1)
+ ld r4, STOP_MMCR1(r13)
+ mtspr SPRN_MMCR0, r3
+ mtspr SPRN_MMCR1, r4
+
+ ld r3, STOP_MMCR2(r13)
+ mtspr SPRN_MMCR2, r3
blr
/*
@@ -357,6 +362,7 @@ power_enter_stop:
b pnv_wakeup_noloss
.Lhandle_esl_ec_set:
+BEGIN_FTR_SECTION
/*
* POWER9 DD2 can incorrectly set PMAO when waking up after a
* state-loss idle. Saving and restoring MMCR0 over idle is a
@@ -364,6 +370,10 @@ power_enter_stop:
*/
mfspr r4,SPRN_MMCR0
std r4,_MMCR0(r1)
+FTR_SECTION_ELSE
+ nop
+ nop
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20)
/*
* Check if the requested state is a deep idle state.
@@ -555,15 +565,22 @@ pnv_restore_hyp_resource_arch300:
* then clear bit 60 in MMCRA to ensure the PMU starts running.
*/
blt cr3,1f
+BEGIN_FTR_SECTION
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
PPC_INVALIDATE_ERAT
ld r1,PACAR1(r13)
+ ld r4,_MMCR0(r1)
+ mtspr SPRN_MMCR0,r4
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20)
mfspr r4,SPRN_MMCRA
ori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
xori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
- ld r4,_MMCR0(r1)
- mtspr SPRN_MMCR0,r4
1:
/*
* POWER ISA 3. Use PSSCR to determine if we
--
2.13.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1
2017-10-20 4:54 ` [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 Nicholas Piggin
@ 2017-10-20 11:11 ` Vaidyanathan Srinivasan
0 siblings, 0 replies; 4+ messages in thread
From: Vaidyanathan Srinivasan @ 2017-10-20 11:11 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: linuxppc-dev
* Nicholas Piggin <npiggin@gmail.com> [2017-10-20 14:54:44]:
> DD2.1 does not have to flush the ERAT after a state-loss idle. It also
> does not have to save and restore MMCR0.
>
> Performance testing was done on a DD2.1 using only the stop0 idle state
> (the shallowest state which supports state loss), using context_switch
> selftest configured to ping-poing between two threads on the same core
^pong
> and two different cores.
>
> Performance improvement for same core is 7.0%, different cores is 14.8%.
Good improvement as we remove unnecessary code and workarounds.
> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
> ---
> arch/powerpc/kernel/idle_book3s.S | 39 ++++++++++++++++++++++++++++-----------
> 1 file changed, 28 insertions(+), 11 deletions(-)
>
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 1125c9be9e06..3531a3e727d2 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -112,12 +112,14 @@ power9_save_additional_sprs:
> std r4, STOP_HFSCR(r13)
>
> mfspr r3, SPRN_MMCRA
> - mfspr r4, SPRN_MMCR1
> + mfspr r4, SPRN_MMCR0
> std r3, STOP_MMCRA(r13)
> - std r4, STOP_MMCR1(r13)
> + std r4, _MMCR0(r1)
>
> - mfspr r3, SPRN_MMCR2
> - std r3, STOP_MMCR2(r13)
> + mfspr r3, SPRN_MMCR1
> + mfspr r4, SPRN_MMCR2
> + std r3, STOP_MMCR1(r13)
> + std r4, STOP_MMCR2(r13)
> blr
>
> power9_restore_additional_sprs:
> @@ -135,11 +137,14 @@ power9_restore_additional_sprs:
> ld r4, STOP_MMCRA(r13)
> mtspr SPRN_HFSCR, r3
> mtspr SPRN_MMCRA, r4
> - /* We have already restored PACA_MMCR0 */
> - ld r3, STOP_MMCR1(r13)
> - ld r4, STOP_MMCR2(r13)
> - mtspr SPRN_MMCR1, r3
> - mtspr SPRN_MMCR2, r4
> +
> + ld r3, _MMCR0(r1)
> + ld r4, STOP_MMCR1(r13)
> + mtspr SPRN_MMCR0, r3
> + mtspr SPRN_MMCR1, r4
> +
> + ld r3, STOP_MMCR2(r13)
> + mtspr SPRN_MMCR2, r3
> blr
>
> /*
> @@ -357,6 +362,7 @@ power_enter_stop:
> b pnv_wakeup_noloss
>
> .Lhandle_esl_ec_set:
> +BEGIN_FTR_SECTION
> /*
> * POWER9 DD2 can incorrectly set PMAO when waking up after a
> * state-loss idle. Saving and restoring MMCR0 over idle is a
> @@ -364,6 +370,10 @@ power_enter_stop:
> */
> mfspr r4,SPRN_MMCR0
> std r4,_MMCR0(r1)
> +FTR_SECTION_ELSE
> + nop
> + nop
> +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20)
>
> /*
> * Check if the requested state is a deep idle state.
> @@ -555,15 +565,22 @@ pnv_restore_hyp_resource_arch300:
> * then clear bit 60 in MMCRA to ensure the PMU starts running.
> */
> blt cr3,1f
> +BEGIN_FTR_SECTION
> + nop
> + nop
> + nop
> + nop
> +FTR_SECTION_ELSE
> PPC_INVALIDATE_ERAT
> ld r1,PACAR1(r13)
> + ld r4,_MMCR0(r1)
> + mtspr SPRN_MMCR0,r4
> +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POWER9_DD1 | CPU_FTR_POWER9_DD20)
> mfspr r4,SPRN_MMCRA
> ori r4,r4,(1 << (63-60))
> mtspr SPRN_MMCRA,r4
> xori r4,r4,(1 << (63-60))
> mtspr SPRN_MMCRA,r4
> - ld r4,_MMCR0(r1)
> - mtspr SPRN_MMCR0,r4
> 1:
> /*
> * POWER ISA 3. Use PSSCR to determine if we
> --
> 2.13.3
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-10-20 11:11 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-20 4:54 [PATCH 0/2] Add POWER9 DD2.0 feature, remove idle workarounds in DD2.1 Nicholas Piggin
2017-10-20 4:54 ` [PATCH 1/2] powerpc: add POWER9_DD20 feature Nicholas Piggin
2017-10-20 4:54 ` [PATCH 2/2] powerpc/64s: idle skip POWER9 DD1 and DD2.0 specific workarounds on DD2.1 Nicholas Piggin
2017-10-20 11:11 ` Vaidyanathan Srinivasan
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.