All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 0/5] power saving wakeup changes for POWER9 MCE
@ 2017-02-16 18:38 Nicholas Piggin
  2017-02-16 18:38 ` [PATCH 1/5] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Nicholas Piggin @ 2017-02-16 18:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Hi,

This is not a bare minimum fix to get the MCE power saving wakeup
working for POWER9, but I had a couple of other things it ended on
top of which made things a bit simpler.

RFC only at this stage because it needs a bit more testing. It
seems to work in simulator though when constructing an MCE from
power save wakeup (whereas it crashes with mainline).

Thanks,
Nick

Nicholas Piggin (5):
  powerpc/64s: move remaining system reset idle code into idle_book3s.S
  powerpc/64: stop using bit in HSPRG0 to test winkle
  powerpc/64s: use alternative feature patching
  powerpc/64s: consolidate pnv_restore_hyp_resource into
    pnv_powersave_wakeup
  powerpc/64s: fix POWER9 machine check handler from stop state

 arch/powerpc/include/asm/exception-64s.h |  18 +----
 arch/powerpc/kernel/exceptions-64s.S     | 116 +++++++++++--------------------
 arch/powerpc/kernel/idle_book3s.S        |  76 ++++++++++----------
 arch/powerpc/platforms/powernv/idle.c    |  13 ----
 4 files changed, 81 insertions(+), 142 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] powerpc/64s: move remaining system reset idle code into idle_book3s.S
  2017-02-16 18:38 [RFC][PATCH 0/5] power saving wakeup changes for POWER9 MCE Nicholas Piggin
@ 2017-02-16 18:38 ` Nicholas Piggin
  2017-02-28 11:40   ` Gautham R Shenoy
  2017-02-16 18:38 ` [PATCH 2/5] powerpc/64: stop using bit in HSPRG0 to test winkle Nicholas Piggin
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2017-02-16 18:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Should be no functional change.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 26 +-----------------------
 arch/powerpc/kernel/idle_book3s.S    | 39 +++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0e1350b30160..d0d89047befe 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -130,31 +130,7 @@ EXC_VIRT_NONE(0x4100, 0x4200)
 
 #ifdef CONFIG_PPC_P7_NAP
 EXC_COMMON_BEGIN(system_reset_idle_common)
-BEGIN_FTR_SECTION
-	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-	bl	pnv_restore_hyp_resource
-
-	li	r0,PNV_THREAD_RUNNING
-	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	li	r0,KVM_HWTHREAD_IN_KERNEL
-	stb	r0,HSTATE_HWTHREAD_STATE(r13)
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	sync
-	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
-	cmpwi	r0,0
-	beq	1f
-	b	kvm_start_guest
-1:
-#endif
-
-	/* Return SRR1 from power7_nap() */
-	mfspr	r3,SPRN_SRR1
-	blt	cr3,2f
-	b	pnv_wakeup_loss
-2:	b	pnv_wakeup_noloss
+	b	pnv_powersave_wakeup
 #endif
 
 EXC_COMMON_BEGIN(system_reset_common)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 72dac0b58061..ea3562f83c57 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -115,7 +115,7 @@ core_idle_lock_held:
  *
  * Address to 'rfid' to in r5
  */
-_GLOBAL(pnv_powersave_common)
+pnv_powersave_common:
 	/* Use r3 to pass state nap/sleep/winkle */
 	/* NAP is a state loss, we create a regs frame on the
 	 * stack, fill it up with the state we care about and
@@ -365,6 +365,34 @@ _GLOBAL(power9_idle_stop)
 	LOAD_REG_ADDR(r5,power_enter_stop)
 	b	pnv_powersave_common
 	/* No return */
+
+.global pnv_powersave_wakeup
+pnv_powersave_wakeup:
+BEGIN_FTR_SECTION
+	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+	bl	pnv_restore_hyp_resource
+
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	li	r0,KVM_HWTHREAD_IN_KERNEL
+	stb	r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi	r0,0
+	beq	1f
+	b	kvm_start_guest
+1:
+#endif
+
+	/* Return SRR1 from power7_nap() */
+	mfspr	r3,SPRN_SRR1
+	blt	cr3,pnv_wakeup_noloss
+	b	pnv_wakeup_loss
+
 /*
  * Called from reset vector. Check whether we have woken up with
  * hypervisor state loss. If yes, restore hypervisor state and return
@@ -373,7 +401,7 @@ _GLOBAL(power9_idle_stop)
  * r13 - Contents of HSPRG0
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
-_GLOBAL(pnv_restore_hyp_resource)
+pnv_restore_hyp_resource:
 BEGIN_FTR_SECTION
 	ld	r2,PACATOC(r13);
 	/*
@@ -436,7 +464,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  * cr3 - gt if waking up with partial/complete hypervisor state loss
  * cr4 - gt or eq if waking up from complete hypervisor state loss.
  */
-_GLOBAL(pnv_wakeup_tb_loss)
+pnv_wakeup_tb_loss:
 	ld	r1,PACAR1(r13)
 	/*
 	 * Before entering any idle state, the NVGPRs are saved in the stack
@@ -640,7 +668,8 @@ fastsleep_workaround_at_exit:
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
  */
-_GLOBAL(pnv_wakeup_loss)
+.global pnv_wakeup_loss
+pnv_wakeup_loss:
 	ld	r1,PACAR1(r13)
 BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
@@ -660,7 +689,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
  */
-_GLOBAL(pnv_wakeup_noloss)
+pnv_wakeup_noloss:
 	lbz	r0,PACA_NAPSTATELOST(r13)
 	cmpwi	r0,0
 	bne	pnv_wakeup_loss
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/5] powerpc/64: stop using bit in HSPRG0 to test winkle
  2017-02-16 18:38 [RFC][PATCH 0/5] power saving wakeup changes for POWER9 MCE Nicholas Piggin
  2017-02-16 18:38 ` [PATCH 1/5] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
@ 2017-02-16 18:38 ` Nicholas Piggin
  2017-02-28 15:08   ` Gautham R Shenoy
  2017-02-16 18:38 ` [PATCH 3/5] powerpc/64s: use alternative feature patching Nicholas Piggin
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2017-02-16 18:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

The POWER8 idle code has a neat trick of programming the power on engine
to restore a low bit into HSPRG0, so idle wakeup code can test and see
if it has been programmed this way and therefore lost all state. Restore
time can be reduced if winkle has not been reached.

However this messes with our r13 PACA pointer, and requires HSPRG0 to
be written to. It also optimizes the slowest and most uncommon case at
the expense of another SPR write in the common nap state wakeup.

Remove this complexity and assume winkle sleeps always require a state
restore. This speedup could be made entirely contained within the winkle
idle code by counting per-core winkles and setting a thread bitmap when
all have gone to winkle.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/exception-64s.h | 18 ++----------------
 arch/powerpc/kernel/exceptions-64s.S     | 23 ++++-------------------
 arch/powerpc/kernel/idle_book3s.S        | 30 ++++++++++--------------------
 arch/powerpc/platforms/powernv/idle.c    | 13 -------------
 4 files changed, 16 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 8ad1cf5c4edf..8f7154a4234f 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -158,17 +158,14 @@ BEGIN_FTR_SECTION_NESTED(943)						\
 	std	ra,offset(r13);						\
 END_FTR_SECTION_NESTED(ftr,ftr,943)
 
-#define EXCEPTION_PROLOG_0_PACA(area)					\
+#define EXCEPTION_PROLOG_0(area)					\
+	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 */			\
 	OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);			\
 	HMT_MEDIUM;							\
 	std	r10,area+EX_R10(r13);	/* save r10 - r12 */		\
 	OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
 
-#define EXCEPTION_PROLOG_0(area)					\
-	GET_PACA(r13);							\
-	EXCEPTION_PROLOG_0_PACA(area)
-
 #define __EXCEPTION_PROLOG_1(area, extra, vec)				\
 	OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);		\
 	OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);		\
@@ -220,17 +217,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	EXCEPTION_PROLOG_1(area, extra, vec);				\
 	EXCEPTION_PROLOG_PSERIES_1_NORI(label, h);
 
-/* Have the PACA in r13 already */
-#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec)	\
-	EXCEPTION_PROLOG_0_PACA(area);					\
-	EXCEPTION_PROLOG_1(area, extra, vec);				\
-	EXCEPTION_PROLOG_PSERIES_1(label, h);
-
-#define EXCEPTION_PROLOG_PSERIES_PACA_NORI(area, label, h, extra, vec)	\
-	EXCEPTION_PROLOG_0_PACA(area);					\
-	EXCEPTION_PROLOG_1(area, extra, vec);				\
-	EXCEPTION_PROLOG_PSERIES_1_NORI(label, h);
-
 #define __KVMTEST(h, n)							\
 	lbz	r10,HSTATE_IN_GUEST(r13);				\
 	cmpwi	r10,0;							\
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index d0d89047befe..5f775783f744 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -116,14 +116,12 @@ EXC_VIRT_NONE(0x4000, 0x4100)
 
 EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
 	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-	clrrdi	r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
 	/*
 	 * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
 	 * being used, so a nested NMI exception would corrupt it.
 	 */
-	EXCEPTION_PROLOG_PSERIES_PACA_NORI(PACA_EXNMI, system_reset_common,
-						EXC_STD, IDLETEST, 0x100)
+	EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common,
+					EXC_STD, IDLETEST, 0x100)
 
 EXC_REAL_END(system_reset, 0x100, 0x200)
 EXC_VIRT_NONE(0x4100, 0x4200)
@@ -180,14 +178,6 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x300)
 	 * vector
 	 */
 	SET_SCRATCH0(r13)		/* save r13 */
-	/*
-	 * Running native on arch 2.06 or later, we may wakeup from winkle
-	 * inside machine check. If yes, then last bit of HSPRG0 would be set
-	 * to 1. Hence clear it unconditionally.
-	 */
-	GET_PACA(r13)
-	clrrdi	r13,r13,1
-	SET_PACA(r13)
 	EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
 	b	machine_check_powernv_early
@@ -362,7 +352,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
 	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
-	beq	4f			/* No, it wasn;t */
+	beq	4f			/* No, it wasn't */
 	/* Thread was in power saving mode. Go back to nap again. */
 	cmpwi	r11,2
 	blt	3f
@@ -392,13 +382,8 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	/*
 	 * Go back to winkle. Please note that this thread was woken up in
 	 * machine check from winkle and have not restored the per-subcore
-	 * state. Hence before going back to winkle, set last bit of HSPRG0
-	 * to 1. This will make sure that if this thread gets woken up
-	 * again at reset vector 0x100 then it will get chance to restore
-	 * the subcore state.
+	 * state.
 	 */
-	ori	r13,r13,1
-	SET_PACA(r13)
 	IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
 	/* No return */
 4:
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index ea3562f83c57..1271344e5523 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -366,11 +366,12 @@ _GLOBAL(power9_idle_stop)
 	b	pnv_powersave_common
 	/* No return */
 
+/*
+ * Called from reset vector for powersave wakeups.
+ * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ */
 .global pnv_powersave_wakeup
 pnv_powersave_wakeup:
-BEGIN_FTR_SECTION
-	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	bl	pnv_restore_hyp_resource
 
 	li	r0,PNV_THREAD_RUNNING
@@ -394,16 +395,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	b	pnv_wakeup_loss
 
 /*
- * Called from reset vector. Check whether we have woken up with
- * hypervisor state loss. If yes, restore hypervisor state and return
- * back to reset vector.
- *
- * r13 - Contents of HSPRG0
- * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ * Check whether we have woken up with hypervisor state loss. If yes,
+ * restore hypervisor state and return back to reset vector.
  */
 pnv_restore_hyp_resource:
-BEGIN_FTR_SECTION
 	ld	r2,PACATOC(r13);
+
+BEGIN_FTR_SECTION
 	/*
 	 * POWER ISA 3. Use PSSCR to determine if we
 	 * are waking up from deep idle state
@@ -429,19 +427,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 	/*
 	 * POWER ISA 2.07 or less.
-	 * Check if last bit of HSPGR0 is set. This indicates whether we are
-	 * waking up from winkle.
+	 * Check if we slept with winkle.
 	 */
-	clrldi	r5,r13,63
-	clrrdi	r13,r13,1
-
-	/* Now that we are sure r13 is corrected, load TOC */
-	ld	r2,PACATOC(r13);
-	cmpwi	cr4,r5,1
-	mtspr	SPRN_HSPRG0,r13
-
 	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
 	cmpwi   cr2,r0,PNV_THREAD_NAP
+	cmpwi   cr4,r0,PNV_THREAD_WINKLE
 	bgt     cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
 
 	/*
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 479c25601612..59dc81a58444 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -53,19 +53,6 @@ static int pnv_save_sprs_for_deep_states(void)
 		uint64_t pir = get_hard_smp_processor_id(cpu);
 		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
 
-		if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
-			/*
-			 * HSPRG0 is used to store the cpu's pointer to paca.
-			 * Hence last 3 bits are guaranteed to be 0. Program
-			 * slw to restore HSPRG0 with 63rd bit set, so that
-			 * when a thread wakes up at 0x100 we can use this bit
-			 * to distinguish between fastsleep and deep winkle.
-			 * This is not necessary with stop/psscr since PLS
-			 * field of psscr indicates which state we are waking
-			 * up from.
-			 */
-			hsprg0_val |= 1;
-		}
 		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
 		if (rc != 0)
 			return rc;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/5] powerpc/64s: use alternative feature patching
  2017-02-16 18:38 [RFC][PATCH 0/5] power saving wakeup changes for POWER9 MCE Nicholas Piggin
  2017-02-16 18:38 ` [PATCH 1/5] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
  2017-02-16 18:38 ` [PATCH 2/5] powerpc/64: stop using bit in HSPRG0 to test winkle Nicholas Piggin
@ 2017-02-16 18:38 ` Nicholas Piggin
  2017-02-28 15:12   ` Gautham R Shenoy
  2017-02-16 18:39 ` [PATCH 4/5] powerpc/64s: consolidate pnv_restore_hyp_resource into pnv_powersave_wakeup Nicholas Piggin
  2017-02-16 18:39 ` [PATCH 5/5] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
  4 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2017-02-16 18:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This reduces the number of nops for POWER8

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/idle_book3s.S | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 1271344e5523..ab15dee371c9 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -417,13 +417,8 @@ BEGIN_FTR_SECTION
 	rldicl  r5,r5,4,60
 	cmpd	cr4,r5,r4
 	bge	cr4,pnv_wakeup_tb_loss
-	/*
-	 * Waking up without hypervisor state loss. Return to
-	 * reset vector
-	 */
-	blr
 
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+FTR_SECTION_ELSE
 
 	/*
 	 * POWER ISA 2.07 or less.
@@ -440,9 +435,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	 * indicates we are waking with hypervisor state loss from nap.
 	 */
 	bgt	cr3,.
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 
-	blr	/* Return back to System Reset vector from where
-		   pnv_restore_hyp_resource was invoked */
+	/*
+	 * Waking up without hypervisor state loss. Return to
+	 * reset vector
+	 */
+	blr
 
 /*
  * Called if waking up from idle state which can cause either partial or
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/5] powerpc/64s: consolidate pnv_restore_hyp_resource into pnv_powersave_wakeup
  2017-02-16 18:38 [RFC][PATCH 0/5] power saving wakeup changes for POWER9 MCE Nicholas Piggin
                   ` (2 preceding siblings ...)
  2017-02-16 18:38 ` [PATCH 3/5] powerpc/64s: use alternative feature patching Nicholas Piggin
@ 2017-02-16 18:39 ` Nicholas Piggin
  2017-02-28 15:26   ` Gautham R Shenoy
  2017-02-16 18:39 ` [PATCH 5/5] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
  4 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2017-02-16 18:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

There is only one caller, so this reduces spaghetti of subsequent
callees returning into the caller.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/idle_book3s.S | 66 +++++++++++++++------------------------
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index ab15dee371c9..58364a834f87 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -372,33 +372,6 @@ _GLOBAL(power9_idle_stop)
  */
 .global pnv_powersave_wakeup
 pnv_powersave_wakeup:
-	bl	pnv_restore_hyp_resource
-
-	li	r0,PNV_THREAD_RUNNING
-	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	li	r0,KVM_HWTHREAD_IN_KERNEL
-	stb	r0,HSTATE_HWTHREAD_STATE(r13)
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	sync
-	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
-	cmpwi	r0,0
-	beq	1f
-	b	kvm_start_guest
-1:
-#endif
-
-	/* Return SRR1 from power7_nap() */
-	mfspr	r3,SPRN_SRR1
-	blt	cr3,pnv_wakeup_noloss
-	b	pnv_wakeup_loss
-
-/*
- * Check whether we have woken up with hypervisor state loss. If yes,
- * restore hypervisor state and return back to reset vector.
- */
-pnv_restore_hyp_resource:
 	ld	r2,PACATOC(r13);
 
 BEGIN_FTR_SECTION
@@ -416,7 +389,7 @@ BEGIN_FTR_SECTION
 	 */
 	rldicl  r5,r5,4,60
 	cmpd	cr4,r5,r4
-	bge	cr4,pnv_wakeup_tb_loss
+	bgel	cr4,pnv_wakeup_tb_loss
 
 FTR_SECTION_ELSE
 
@@ -425,23 +398,36 @@ FTR_SECTION_ELSE
 	 * Check if we slept with winkle.
 	 */
 	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi   cr2,r0,PNV_THREAD_NAP
-	cmpwi   cr4,r0,PNV_THREAD_WINKLE
-	bgt     cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
+	cmpwi	cr2,r0,PNV_THREAD_NAP
+	cmpwi	cr4,r0,PNV_THREAD_WINKLE
+	bgtl	cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
 
-	/*
-	 * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
-	 * up from nap. At this stage CR3 shouldn't contains 'gt' since that
-	 * indicates we are waking with hypervisor state loss from nap.
-	 */
-	bgt	cr3,.
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 
 	/*
-	 * Waking up without hypervisor state loss. Return to
-	 * reset vector
+	 * Waking up without hypervisor state loss, or continuing after
+	 * hypervisor state has been restored.
 	 */
-	blr
+
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	li	r0,KVM_HWTHREAD_IN_KERNEL
+	stb	r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi	r0,0
+	beq	1f
+	b	kvm_start_guest
+1:
+#endif
+
+	/* Return SRR1 from power7_nap() */
+	mfspr	r3,SPRN_SRR1
+	blt	cr3,pnv_wakeup_noloss
+	b	pnv_wakeup_loss
 
 /*
  * Called if waking up from idle state which can cause either partial or
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/5] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-02-16 18:38 [RFC][PATCH 0/5] power saving wakeup changes for POWER9 MCE Nicholas Piggin
                   ` (3 preceding siblings ...)
  2017-02-16 18:39 ` [PATCH 4/5] powerpc/64s: consolidate pnv_restore_hyp_resource into pnv_powersave_wakeup Nicholas Piggin
@ 2017-02-16 18:39 ` Nicholas Piggin
  2017-02-28 17:15   ` Gautham R Shenoy
  4 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2017-02-16 18:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

The ISA specifies power save wakeup can cause a machine check interrupt.
The machine check handler currently has code to handle that for POWER8,
but POWER9 crashes when trying to execute the P8 style sleep
instructions.

So queue up the machine check, then call into the idle code to wake up
as the system reset interrupt does, rather than attempting to sleep
again without going through the main idle path.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 71 ++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 5f775783f744..0388843c8d12 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -329,6 +329,35 @@ EXC_COMMON_BEGIN(machine_check_common)
 	/* restore original r1. */			\
 	ld	r1,GPR1(r1)
 
+#ifdef CONFIG_PPC_P7_NAP
+EXC_COMMON_BEGIN(machine_check_idle_common)
+	bl	machine_check_queue_event
+	/*
+	 * Queue the machine check, then reload SRR1 and use it to set
+	 * CR3 according to pnv_powersave_wakeup convention.
+	 */
+	ld	r12,_MSR(r1)
+	rlwinm	r11,r12,47-31,30,31
+	cmpwi	cr3,r11,2
+
+	/*
+	 * Now have to make SRR1 wake up reason look like a system reset
+	 * interrupt. Put 0xf in there, which is reserved (and does not
+	 * match HMI).
+	 */
+	li	r11,0xf
+	insrdi	r12,r11,4,45
+	mtspr	r12,SPRN_SRR1
+	std	r12,_MSR(r1)
+
+	/*
+	 * Decrement MCE nesting after finishing with the stack.
+	 */
+	lhz	r11,PACA_IN_MCE(r13)
+	subi	r11,r11,1
+	sth	r11,PACA_IN_MCE(r13)
+	b	pnv_powersave_wakeup
+#endif
 	/*
 	 * Handle machine check early in real mode. We come here with
 	 * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
@@ -341,6 +370,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	bl	machine_check_early
 	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
+
 #ifdef	CONFIG_PPC_P7_NAP
 	/*
 	 * Check if thread was in power saving mode. We come here when any
@@ -351,43 +381,14 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 *
 	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
-	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
-	beq	4f			/* No, it wasn't */
-	/* Thread was in power saving mode. Go back to nap again. */
-	cmpwi	r11,2
-	blt	3f
-	/* Supervisor/Hypervisor state loss */
-	li	r0,1
-	stb	r0,PACA_NAPSTATELOST(r13)
-3:	bl	machine_check_queue_event
-	MACHINE_CHECK_HANDLER_WINDUP
-	GET_PACA(r13)
-	ld	r1,PACAR1(r13)
-	/*
-	 * Check what idle state this CPU was in and go back to same mode
-	 * again.
-	 */
-	lbz	r3,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi	r3,PNV_THREAD_NAP
-	bgt	10f
-	IDLE_STATE_ENTER_SEQ(PPC_NAP)
-	/* No return */
-10:
-	cmpwi	r3,PNV_THREAD_SLEEP
-	bgt	2f
-	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
-	/* No return */
-
-2:
-	/*
-	 * Go back to winkle. Please note that this thread was woken up in
-	 * machine check from winkle and have not restored the per-subcore
-	 * state.
-	 */
-	IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
-	/* No return */
+	BEGIN_FTR_SECTION
+	rlwinm.	r11,r12,47-31,30,31
+	beq-	4f
+	BRANCH_TO_COMMON(r10, machine_check_idle_common)
 4:
+	END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
+
 	/*
 	 * Check if we are coming from hypervisor userspace. If yes then we
 	 * continue in host kernel in V mode to deliver the MC event.
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/5] powerpc/64s: move remaining system reset idle code into idle_book3s.S
  2017-02-16 18:38 ` [PATCH 1/5] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
@ 2017-02-28 11:40   ` Gautham R Shenoy
  0 siblings, 0 replies; 14+ messages in thread
From: Gautham R Shenoy @ 2017-02-28 11:40 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, ego

Hi Nick,

This patch is fine by me.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

On Fri, Feb 17, 2017 at 12:08 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> Should be no functional change.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kernel/exceptions-64s.S | 26 +-----------------------
>  arch/powerpc/kernel/idle_book3s.S    | 39 +++++++++++++++++++++++++++++++-----
>  2 files changed, 35 insertions(+), 30 deletions(-)
>
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 0e1350b30160..d0d89047befe 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -130,31 +130,7 @@ EXC_VIRT_NONE(0x4100, 0x4200)
>
>  #ifdef CONFIG_PPC_P7_NAP
>  EXC_COMMON_BEGIN(system_reset_idle_common)
> -BEGIN_FTR_SECTION
> -       GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
> -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> -       bl      pnv_restore_hyp_resource
> -
> -       li      r0,PNV_THREAD_RUNNING
> -       stb     r0,PACA_THREAD_IDLE_STATE(r13)  /* Clear thread state */
> -
> -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> -       li      r0,KVM_HWTHREAD_IN_KERNEL
> -       stb     r0,HSTATE_HWTHREAD_STATE(r13)
> -       /* Order setting hwthread_state vs. testing hwthread_req */
> -       sync
> -       lbz     r0,HSTATE_HWTHREAD_REQ(r13)
> -       cmpwi   r0,0
> -       beq     1f
> -       b       kvm_start_guest
> -1:
> -#endif
> -
> -       /* Return SRR1 from power7_nap() */
> -       mfspr   r3,SPRN_SRR1
> -       blt     cr3,2f
> -       b       pnv_wakeup_loss
> -2:     b       pnv_wakeup_noloss
> +       b       pnv_powersave_wakeup
>  #endif
>
>  EXC_COMMON_BEGIN(system_reset_common)
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 72dac0b58061..ea3562f83c57 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -115,7 +115,7 @@ core_idle_lock_held:
>   *
>   * Address to 'rfid' to in r5
>   */
> -_GLOBAL(pnv_powersave_common)
> +pnv_powersave_common:
>         /* Use r3 to pass state nap/sleep/winkle */
>         /* NAP is a state loss, we create a regs frame on the
>          * stack, fill it up with the state we care about and
> @@ -365,6 +365,34 @@ _GLOBAL(power9_idle_stop)
>         LOAD_REG_ADDR(r5,power_enter_stop)
>         b       pnv_powersave_common
>         /* No return */
> +
> +.global pnv_powersave_wakeup
> +pnv_powersave_wakeup:
> +BEGIN_FTR_SECTION
> +       GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
> +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> +       bl      pnv_restore_hyp_resource
> +
> +       li      r0,PNV_THREAD_RUNNING
> +       stb     r0,PACA_THREAD_IDLE_STATE(r13)  /* Clear thread state */
> +
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +       li      r0,KVM_HWTHREAD_IN_KERNEL
> +       stb     r0,HSTATE_HWTHREAD_STATE(r13)
> +       /* Order setting hwthread_state vs. testing hwthread_req */
> +       sync
> +       lbz     r0,HSTATE_HWTHREAD_REQ(r13)
> +       cmpwi   r0,0
> +       beq     1f
> +       b       kvm_start_guest
> +1:
> +#endif
> +
> +       /* Return SRR1 from power7_nap() */
> +       mfspr   r3,SPRN_SRR1
> +       blt     cr3,pnv_wakeup_noloss
> +       b       pnv_wakeup_loss
> +
>  /*
>   * Called from reset vector. Check whether we have woken up with
>   * hypervisor state loss. If yes, restore hypervisor state and return
> @@ -373,7 +401,7 @@ _GLOBAL(power9_idle_stop)
>   * r13 - Contents of HSPRG0
>   * cr3 - set to gt if waking up with partial/complete hypervisor state loss
>   */
> -_GLOBAL(pnv_restore_hyp_resource)
> +pnv_restore_hyp_resource:
>  BEGIN_FTR_SECTION
>         ld      r2,PACATOC(r13);
>         /*
> @@ -436,7 +464,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
>   * cr3 - gt if waking up with partial/complete hypervisor state loss
>   * cr4 - gt or eq if waking up from complete hypervisor state loss.
>   */
> -_GLOBAL(pnv_wakeup_tb_loss)
> +pnv_wakeup_tb_loss:
>         ld      r1,PACAR1(r13)
>         /*
>          * Before entering any idle state, the NVGPRs are saved in the stack
> @@ -640,7 +668,8 @@ fastsleep_workaround_at_exit:
>   * R3 here contains the value that will be returned to the caller
>   * of power7_nap.
>   */
> -_GLOBAL(pnv_wakeup_loss)
> +.global pnv_wakeup_loss
> +pnv_wakeup_loss:
>         ld      r1,PACAR1(r13)
>  BEGIN_FTR_SECTION
>         CHECK_HMI_INTERRUPT
> @@ -660,7 +689,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>   * R3 here contains the value that will be returned to the caller
>   * of power7_nap.
>   */
> -_GLOBAL(pnv_wakeup_noloss)
> +pnv_wakeup_noloss:
>         lbz     r0,PACA_NAPSTATELOST(r13)
>         cmpwi   r0,0
>         bne     pnv_wakeup_loss
> --
> 2.11.0
>



-- 
Thanks and Regards
gautham.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/5] powerpc/64: stop using bit in HSPRG0 to test winkle
  2017-02-16 18:38 ` [PATCH 2/5] powerpc/64: stop using bit in HSPRG0 to test winkle Nicholas Piggin
@ 2017-02-28 15:08   ` Gautham R Shenoy
  2017-02-28 15:36     ` Nicholas Piggin
  0 siblings, 1 reply; 14+ messages in thread
From: Gautham R Shenoy @ 2017-02-28 15:08 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, ego, Vaidyanathan Srinivasan

Hi Nick,

On Fri, Feb 17, 2017 at 12:08 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> The POWER8 idle code has a neat trick of programming the power on engine
> to restore a low bit into HSPRG0, so idle wakeup code can test and see
> if it has been programmed this way and therefore lost all state. Restore
> time can be reduced if winkle has not been reached.
>

> However this messes with our r13 PACA pointer, and requires HSPRG0 to
> be written to. It also optimizes the slowest and most uncommon case at
> the expense of another SPR write in the common nap state wakeup.

Actually, this optimization was needed to reduce the guest entry time
for a KVM guest vcore.

While running KVM on POWER8, the host is in SMT=1 mode with the
secondary threads
being hotplugged out and supposed to have entered the winkle state.
Since the primary thread is still running, the core would still be in Nap.

However, each time a guest vcore is scheduled on the core, the
secondary threads are sent an IPI to wake them up from the idle state.

On waking up, they use the last bit of HSPRG0 and conclude that they
don't need to restore resources before executing the guest entry code.

Thus, the HSPRG0 trick help the secondary threads avoid spending
extra cycles restoring the SLBs and per-thread SPRs.



>
> Remove this complexity and assume winkle sleeps always require a state
> restore. This speedup could be made entirely contained within the winkle
> idle code by counting per-core winkles and setting a thread bitmap when
> all have gone to winkle.

This is a good idea! We are anyway taking the lock in pnv_wakeup_tb_loss()
to check if we are the first thread in the core waking up from a deep-state.

We can check another bitmap if we are the first thread waking up from winkle
and set cr4 to the result of that comparison while holding the lock.

That shouldn't cost us much, at least for the fake-wakeup-from-winkle
for KVM case alluded to above.

Otherwise, the patch looks good to me.


>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

--
Thanks and Regards
gautham.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/5] powerpc/64s: use alternative feature patching
  2017-02-16 18:38 ` [PATCH 3/5] powerpc/64s: use alternative feature patching Nicholas Piggin
@ 2017-02-28 15:12   ` Gautham R Shenoy
  0 siblings, 0 replies; 14+ messages in thread
From: Gautham R Shenoy @ 2017-02-28 15:12 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, ego, Vaidyanathan Srinivasan

Hi Nick,

On Fri, Feb 17, 2017 at 12:08 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> This reduces the number of nops for POWER8
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

This change looks ok to me.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

> ---
>  arch/powerpc/kernel/idle_book3s.S | 15 +++++++--------
>  1 file changed, 7 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 1271344e5523..ab15dee371c9 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -417,13 +417,8 @@ BEGIN_FTR_SECTION
>         rldicl  r5,r5,4,60
>         cmpd    cr4,r5,r4
>         bge     cr4,pnv_wakeup_tb_loss
> -       /*
> -        * Waking up without hypervisor state loss. Return to
> -        * reset vector
> -        */
> -       blr
>
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
> +FTR_SECTION_ELSE
>
>         /*
>          * POWER ISA 2.07 or less.
> @@ -440,9 +435,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
>          * indicates we are waking with hypervisor state loss from nap.
>          */
>         bgt     cr3,.
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
>
> -       blr     /* Return back to System Reset vector from where
> -                  pnv_restore_hyp_resource was invoked */
> +       /*
> +        * Waking up without hypervisor state loss. Return to
> +        * reset vector
> +        */
> +       blr
>
>  /*
>   * Called if waking up from idle state which can cause either partial or
> --
> 2.11.0
>



-- 
Thanks and Regards
gautham.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/5] powerpc/64s: consolidate pnv_restore_hyp_resource into pnv_powersave_wakeup
  2017-02-16 18:39 ` [PATCH 4/5] powerpc/64s: consolidate pnv_restore_hyp_resource into pnv_powersave_wakeup Nicholas Piggin
@ 2017-02-28 15:26   ` Gautham R Shenoy
  0 siblings, 0 replies; 14+ messages in thread
From: Gautham R Shenoy @ 2017-02-28 15:26 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, ego, Vaidyanathan Srinivasan

Hi Nick,

On Fri, Feb 17, 2017 at 12:09 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> There is only one caller, so this reduces spaghetti of subsequent
> callees returning into the caller.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

This patch is good!

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

--
Thanks and Regards
gautham.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/5] powerpc/64: stop using bit in HSPRG0 to test winkle
  2017-02-28 15:08   ` Gautham R Shenoy
@ 2017-02-28 15:36     ` Nicholas Piggin
  0 siblings, 0 replies; 14+ messages in thread
From: Nicholas Piggin @ 2017-02-28 15:36 UTC (permalink / raw)
  To: Gautham R Shenoy; +Cc: ego, linuxppc-dev, Vaidyanathan Srinivasan

On Tue, 28 Feb 2017 20:38:19 +0530
Gautham R Shenoy <ego.lkml@gmail.com> wrote:

> Hi Nick,
> 
> On Fri, Feb 17, 2017 at 12:08 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> > The POWER8 idle code has a neat trick of programming the power on engine
> > to restore a low bit into HSPRG0, so idle wakeup code can test and see
> > if it has been programmed this way and therefore lost all state. Restore
> > time can be reduced if winkle has not been reached.
> >  
> 
> > However this messes with our r13 PACA pointer, and requires HSPRG0 to
> > be written to. It also optimizes the slowest and most uncommon case at
> > the expense of another SPR write in the common nap state wakeup.  
> 
> Actually, this optimization was needed to reduce the guest entry time
> for a KVM guest vcore.
> 
> While running KVM on POWER8, the host is in SMT=1 mode with the
> secondary threads
> being hotplugged out and supposed to have entered the winkle state.
> Since the primary thread is still running, the core would still be in Nap.
> 
> However, each time a guest vcore is scheduled on the core, the
> secondary threads are sent an IPI to wake them up from the idle state.
> 
> On waking up, they use the last bit of HSPRG0 and conclude that they
> don't need to restore resources before executing the guest entry code.
> 
> Thus, the HSPRG0 trick help the secondary threads avoid spending
> extra cycles restoring the SLBs and per-thread SPRs.

Ah, thanks. Until now I didn't really understand why that case was
optimimized. My mistake and apologies for assuming it was not a good
reason :)


> > Remove this complexity and assume winkle sleeps always require a state
> > restore. This speedup could be made entirely contained within the winkle
> > idle code by counting per-core winkles and setting a thread bitmap when
> > all have gone to winkle.  
> 
> This is a good idea! We are anyway taking the lock in pnv_wakeup_tb_loss()
> to check if we are the first thread in the core waking up from a deep-state.
> 
> We can check another bitmap if we are the first thread waking up from winkle
> and set cr4 to the result of that comparison while holding the lock.
> 
> That shouldn't cost us much, at least for the fake-wakeup-from-winkle
> for KVM case alluded to above.
> 
> Otherwise, the patch looks good to me.

Okay, I had a half-done patch for this. I'll finish it and send it
out for review.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-02-16 18:39 ` [PATCH 5/5] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
@ 2017-02-28 17:15   ` Gautham R Shenoy
  2017-03-08 12:19     ` Nicholas Piggin
  0 siblings, 1 reply; 14+ messages in thread
From: Gautham R Shenoy @ 2017-02-28 17:15 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, ego, Vaidyanathan Srinivasan

Hi Nick,

On Fri, Feb 17, 2017 at 12:09 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> The ISA specifies power save wakeup can cause a machine check interrupt.
> The machine check handler currently has code to handle that for POWER8,
> but POWER9 crashes when trying to execute the P8 style sleep
> instructions.
>
> So queue up the machine check, then call into the idle code to wake up
> as the system reset interrupt does, rather than attempting to sleep
> again without going through the main idle path.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kernel/exceptions-64s.S | 71 ++++++++++++++++++------------------
>  1 file changed, 36 insertions(+), 35 deletions(-)
>
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 5f775783f744..0388843c8d12 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -329,6 +329,35 @@ EXC_COMMON_BEGIN(machine_check_common)
>         /* restore original r1. */                      \
>         ld      r1,GPR1(r1)
>
> +#ifdef CONFIG_PPC_P7_NAP
> +EXC_COMMON_BEGIN(machine_check_idle_common)
> +       bl      machine_check_queue_event
> +       /*
> +        * Queue the machine check, then reload SRR1 and use it to set
> +        * CR3 according to pnv_powersave_wakeup convention.
> +        */
> +       ld      r12,_MSR(r1)
> +       rlwinm  r11,r12,47-31,30,31
> +       cmpwi   cr3,r11,2
> +
> +       /*
> +        * Now have to make SRR1 wake up reason look like a system reset
> +        * interrupt. Put 0xf in there, which is reserved (and does not
> +        * match HMI).

The only places where the wakeup reason is presently checked on the way out
of idle-exit are in KVM guest entry path in kvmppc_check_wake_reason()
 and on the CPU-Hotplug exit path pnv_smp_cpu_kill_self(). In both places
we do a positive check for EE, Doorbell, HVEE . The kvm case is also
interested in
HMI. We ignore all the other reasons at the moment.

So this should be fine.
> +        */
> +       li      r11,0xf
> +       insrdi  r12,r11,4,45


Shouldn't this be insrdi r12,r11,4,42? The exception bits are 42:45.
I always have trouble wrapping my head around these nifty
rotate-shift-mask-insert instructions. So I might as well be wrong!


> +       mtspr   r12,SPRN_SRR1
> +       std     r12,_MSR(r1)
> +
> +       /*
> +        * Decrement MCE nesting after finishing with the stack.
> +        */
> +       lhz     r11,PACA_IN_MCE(r13)
> +       subi    r11,r11,1
> +       sth     r11,PACA_IN_MCE(r13)
> +       b       pnv_powersave_wakeup
> +#endif
>         /*
>          * Handle machine check early in real mode. We come here with
>          * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
> @@ -341,6 +370,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>         bl      machine_check_early
>         std     r3,RESULT(r1)   /* Save result */
>         ld      r12,_MSR(r1)
> +
>  #ifdef CONFIG_PPC_P7_NAP
>         /*
>          * Check if thread was in power saving mode. We come here when any
> @@ -351,43 +381,14 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
>          *
>          * Go back to nap/sleep/winkle mode again if (b) is true.
>          */
> -       rlwinm. r11,r12,47-31,30,31     /* Was it in power saving mode? */
> -       beq     4f                      /* No, it wasn't */
> -       /* Thread was in power saving mode. Go back to nap again. */
> -       cmpwi   r11,2
> -       blt     3f
> -       /* Supervisor/Hypervisor state loss */
> -       li      r0,1
> -       stb     r0,PACA_NAPSTATELOST(r13)
> -3:     bl      machine_check_queue_event
> -       MACHINE_CHECK_HANDLER_WINDUP
> -       GET_PACA(r13)
> -       ld      r1,PACAR1(r13)
> -       /*
> -        * Check what idle state this CPU was in and go back to same mode
> -        * again.
> -        */
> -       lbz     r3,PACA_THREAD_IDLE_STATE(r13)
> -       cmpwi   r3,PNV_THREAD_NAP
> -       bgt     10f
> -       IDLE_STATE_ENTER_SEQ(PPC_NAP)
> -       /* No return */
> -10:
> -       cmpwi   r3,PNV_THREAD_SLEEP
> -       bgt     2f
> -       IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
> -       /* No return */
> -
> -2:
> -       /*
> -        * Go back to winkle. Please note that this thread was woken up in
> -        * machine check from winkle and have not restored the per-subcore
> -        * state.
> -        */
> -       IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
> -       /* No return */
> +       BEGIN_FTR_SECTION
> +       rlwinm. r11,r12,47-31,30,31
> +       beq-    4f
> +       BRANCH_TO_COMMON(r10, machine_check_idle_common)
>  4:
> +       END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
>  #endif
> +
>         /*
>          * Check if we are coming from hypervisor userspace. If yes then we
>          * continue in host kernel in V mode to deliver the MC event.
> --
> 2.11.0
>

Otherwise, the patch looks correct to me.
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

-- 
Thanks and Regards
gautham.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-02-28 17:15   ` Gautham R Shenoy
@ 2017-03-08 12:19     ` Nicholas Piggin
  2017-03-10 10:34       ` Gautham R Shenoy
  0 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2017-03-08 12:19 UTC (permalink / raw)
  To: Gautham R Shenoy; +Cc: ego, linuxppc-dev, Vaidyanathan Srinivasan

Hi Gautham,

I'm just getting back to this. Sorry for the late reply, and
thanks for the reviews.

On Tue, 28 Feb 2017 22:45:46 +0530
Gautham R Shenoy <ego.lkml@gmail.com> wrote:

> Hi Nick,
> 
> On Fri, Feb 17, 2017 at 12:09 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> > The ISA specifies power save wakeup can cause a machine check interrupt.
> > The machine check handler currently has code to handle that for POWER8,
> > but POWER9 crashes when trying to execute the P8 style sleep
> > instructions.
> >
> > So queue up the machine check, then call into the idle code to wake up
> > as the system reset interrupt does, rather than attempting to sleep
> > again without going through the main idle path.
> >
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/kernel/exceptions-64s.S | 71 ++++++++++++++++++------------------
> >  1 file changed, 36 insertions(+), 35 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > index 5f775783f744..0388843c8d12 100644
> > --- a/arch/powerpc/kernel/exceptions-64s.S
> > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > @@ -329,6 +329,35 @@ EXC_COMMON_BEGIN(machine_check_common)
> >         /* restore original r1. */                      \
> >         ld      r1,GPR1(r1)
> >
> > +#ifdef CONFIG_PPC_P7_NAP
> > +EXC_COMMON_BEGIN(machine_check_idle_common)
> > +       bl      machine_check_queue_event
> > +       /*
> > +        * Queue the machine check, then reload SRR1 and use it to set
> > +        * CR3 according to pnv_powersave_wakeup convention.
> > +        */
> > +       ld      r12,_MSR(r1)
> > +       rlwinm  r11,r12,47-31,30,31
> > +       cmpwi   cr3,r11,2
> > +
> > +       /*
> > +        * Now have to make SRR1 wake up reason look like a system reset
> > +        * interrupt. Put 0xf in there, which is reserved (and does not
> > +        * match HMI).  
> 
> The only places where the wakeup reason is presently checked on the way out
> of idle-exit are in KVM guest entry path in kvmppc_check_wake_reason()
>  and on the CPU-Hotplug exit path pnv_smp_cpu_kill_self(). In both places
> we do a positive check for EE, Doorbell, HVEE . The kvm case is also
> interested in
> HMI. We ignore all the other reasons at the moment.
> 
> So this should be fine.

Okay, thanks for confirming. We will have to be careful about this I
suppose if the wakeup reasons are expanded. I'll make a note of it
in asm/reg.h with the WAKEMASK definitions.

> > +        */
> > +       li      r11,0xf
> > +       insrdi  r12,r11,4,45  
> 
> 
> Shouldn't this be insrdi r12,r11,4,42? The exception bits are 42:45.
> I always have trouble wrapping my head around these nifty
> rotate-shift-mask-insert instructions. So I might as well be wrong!

Ah I think you're right, good catch. Maybe oris r12,r12,0x3c is a better
choice than that insrdi?


> 
> Otherwise, the patch looks correct to me.
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

Very much appreciate the reviews. I'm just getting some time to work on
the winkle count patch, so I'll repost with your suggestions when that's
done.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-08 12:19     ` Nicholas Piggin
@ 2017-03-10 10:34       ` Gautham R Shenoy
  0 siblings, 0 replies; 14+ messages in thread
From: Gautham R Shenoy @ 2017-03-10 10:34 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Gautham R Shenoy, ego, linuxppc-dev, Vaidyanathan Srinivasan

On Wed, Mar 08, 2017 at 10:19:19PM +1000, Nicholas Piggin wrote:
> Hi Gautham,
> 
> I'm just getting back to this. Sorry for the late reply, and
> thanks for the reviews.

No problems.

> 
[..snip..]
> > > +#ifdef CONFIG_PPC_P7_NAP
> > > +EXC_COMMON_BEGIN(machine_check_idle_common)
> > > +       bl      machine_check_queue_event
> > > +       /*
> > > +        * Queue the machine check, then reload SRR1 and use it to set
> > > +        * CR3 according to pnv_powersave_wakeup convention.
> > > +        */
> > > +       ld      r12,_MSR(r1)
> > > +       rlwinm  r11,r12,47-31,30,31
> > > +       cmpwi   cr3,r11,2
> > > +
> > > +       /*
> > > +        * Now have to make SRR1 wake up reason look like a system reset
> > > +        * interrupt. Put 0xf in there, which is reserved (and does not
> > > +        * match HMI).  
> > 
> > The only places where the wakeup reason is presently checked on the way out
> > of idle-exit are in KVM guest entry path in kvmppc_check_wake_reason()
> >  and on the CPU-Hotplug exit path pnv_smp_cpu_kill_self(). In both places
> > we do a positive check for EE, Doorbell, HVEE . The kvm case is also
> > interested in
> > HMI. We ignore all the other reasons at the moment.
> > 
> > So this should be fine.
> 
> Okay, thanks for confirming. We will have to be careful about this I
> suppose if the wakeup reasons are expanded. I'll make a note of it
> in asm/reg.h with the WAKEMASK definitions.

Sounds reasonable.

> 
> > > +        */
> > > +       li      r11,0xf
> > > +       insrdi  r12,r11,4,45  
> > 
> > 
> > Shouldn't this be insrdi r12,r11,4,42? The exception bits are 42:45.
> > I always have trouble wrapping my head around these nifty
> > rotate-shift-mask-insert instructions. So I might as well be wrong!
> 
> Ah I think you're right, good catch. Maybe oris r12,r12,0x3c is a better
> choice than that insrdi?

Perhaps oris is a better choice in this case since we are anyway
setting every bit in 42:45 range. Not sure if it will save any cycles,
but it will certainly reduce an instruction!

> 
> 
> > 
> > Otherwise, the patch looks correct to me.
> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> 
> Very much appreciate the reviews. I'm just getting some time to work on
> the winkle count patch, so I'll repost with your suggestions when that's
> done.
>

Looking forward to the new version!


> Thanks,
> Nick
> 
--
Thanks and Regards
gautham.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-03-10 10:35 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-16 18:38 [RFC][PATCH 0/5] power saving wakeup changes for POWER9 MCE Nicholas Piggin
2017-02-16 18:38 ` [PATCH 1/5] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
2017-02-28 11:40   ` Gautham R Shenoy
2017-02-16 18:38 ` [PATCH 2/5] powerpc/64: stop using bit in HSPRG0 to test winkle Nicholas Piggin
2017-02-28 15:08   ` Gautham R Shenoy
2017-02-28 15:36     ` Nicholas Piggin
2017-02-16 18:38 ` [PATCH 3/5] powerpc/64s: use alternative feature patching Nicholas Piggin
2017-02-28 15:12   ` Gautham R Shenoy
2017-02-16 18:39 ` [PATCH 4/5] powerpc/64s: consolidate pnv_restore_hyp_resource into pnv_powersave_wakeup Nicholas Piggin
2017-02-28 15:26   ` Gautham R Shenoy
2017-02-16 18:39 ` [PATCH 5/5] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
2017-02-28 17:15   ` Gautham R Shenoy
2017-03-08 12:19     ` Nicholas Piggin
2017-03-10 10:34       ` Gautham R Shenoy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.