All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9
@ 2017-03-20  6:01 Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 1/7] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
                   ` (6 more replies)
  0 siblings, 7 replies; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20  6:01 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

Since last time:

- Commented machine check handler wakeup to describe why registers are
  not lost.
- Dropped patch to make PACA_THREAD_IDLE_STATE POWER8-only because
  Gautham will use it for POWER9 DD1.
- Removed some leftover debug cruft noticed by Gautham, and re-tested
  on POWER8. Added some extra comments to hopefully try to illustrate
  the idle state changes a bit better.
  
Needs:
- Ack from Gautham for patch 7
- Ack from Mahesh for patch 4

Thanks,
Nick

Nicholas Piggin (7):
  powerpc/64s: move remaining system reset idle code into idle_book3s.S
  powerpc/64s: stop using bit in HSPRG0 to test winkle
  powerpc/64s: use alternative feature patching
  powerpc/64s: fix POWER9 machine check handler from stop state
  powerpc/64s: idle expand core idle state bits
  powerpc/64s: idle do not hold reservation longer than required
  powerpc/64s: idle POWER8 avoid full state loss recovery where possible

 arch/powerpc/include/asm/cpuidle.h       |  32 ++++-
 arch/powerpc/include/asm/exception-64s.h |  13 +-
 arch/powerpc/include/asm/reg.h           |   1 +
 arch/powerpc/kernel/exceptions-64s.S     | 123 +++++++----------
 arch/powerpc/kernel/idle_book3s.S        | 227 ++++++++++++++++++++++---------
 arch/powerpc/platforms/powernv/idle.c    |  13 --
 6 files changed, 242 insertions(+), 167 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/7] powerpc/64s: move remaining system reset idle code into idle_book3s.S
  2017-03-20  6:01 [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
@ 2017-03-20  6:01 ` Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 2/7] powerpc/64s: stop using bit in HSPRG0 to test winkle Nicholas Piggin
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20  6:01 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

Should be no functional change.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 26 +------------
 arch/powerpc/kernel/idle_book3s.S    | 73 ++++++++++++++++++++++++------------
 2 files changed, 49 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0f54b4844399..896b439655eb 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -126,31 +126,7 @@ EXC_VIRT_NONE(0x4100, 0x100)
 
 #ifdef CONFIG_PPC_P7_NAP
 EXC_COMMON_BEGIN(system_reset_idle_common)
-BEGIN_FTR_SECTION
-	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-	bl	pnv_restore_hyp_resource
-
-	li	r0,PNV_THREAD_RUNNING
-	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	li	r0,KVM_HWTHREAD_IN_KERNEL
-	stb	r0,HSTATE_HWTHREAD_STATE(r13)
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	sync
-	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
-	cmpwi	r0,0
-	beq	1f
-	BRANCH_TO_KVM(r10, kvm_start_guest)
-1:
-#endif
-
-	/* Return SRR1 from power7_nap() */
-	mfspr	r3,SPRN_SRR1
-	blt	cr3,2f
-	b	pnv_wakeup_loss
-2:	b	pnv_wakeup_noloss
+	b	pnv_powersave_wakeup
 #endif
 
 EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 6fd08219248d..80ca3fd37b4e 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -20,6 +20,7 @@
 #include <asm/kvm_book3s_asm.h>
 #include <asm/opal.h>
 #include <asm/cpuidle.h>
+#include <asm/exception-64s.h>
 #include <asm/book3s/64/mmu-hash.h>
 #include <asm/mmu.h>
 
@@ -113,7 +114,7 @@ core_idle_lock_held:
  *
  * Address to 'rfid' to in r5
  */
-_GLOBAL(pnv_powersave_common)
+pnv_powersave_common:
 	/* Use r3 to pass state nap/sleep/winkle */
 	/* NAP is a state loss, we create a regs frame on the
 	 * stack, fill it up with the state we care about and
@@ -188,8 +189,8 @@ pnv_enter_arch207_idle_mode:
 	/* The following store to HSTATE_HWTHREAD_STATE(r13)  */
 	/* MUST occur in real mode, i.e. with the MMU off,    */
 	/* and the MMU must stay off until we clear this flag */
-	/* and test HSTATE_HWTHREAD_REQ(r13) in the system    */
-	/* reset interrupt vector in exceptions-64s.S.        */
+	/* and test HSTATE_HWTHREAD_REQ(r13) in               */
+	/* pnv_powersave_wakeup in this file.                 */
 	/* The reason is that another thread can switch the   */
 	/* MMU to a guest context whenever this flag is set   */
 	/* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
@@ -375,15 +376,42 @@ _GLOBAL(power9_idle_stop)
 	li	r4,1
 	b	pnv_powersave_common
 	/* No return */
+
+.global pnv_powersave_wakeup
+pnv_powersave_wakeup:
+BEGIN_FTR_SECTION
+	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+	bl	pnv_restore_hyp_resource
+
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	li	r0,KVM_HWTHREAD_IN_KERNEL
+	stb	r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi	r0,0
+	beq	1f
+	BRANCH_TO_KVM(r10, kvm_start_guest)
+1:
+#endif
+
+	/* Return SRR1 from power7_nap() */
+	mfspr	r3,SPRN_SRR1
+	blt	cr3,pnv_wakeup_noloss
+	b	pnv_wakeup_loss
+
 /*
- * Called from reset vector. Check whether we have woken up with
- * hypervisor state loss. If yes, restore hypervisor state and return
- * back to reset vector.
+ * Check whether we have woken up with hypervisor state loss.
+ * If yes, restore hypervisor state and return back to link.
  *
  * r13 - Contents of HSPRG0
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
-_GLOBAL(pnv_restore_hyp_resource)
+pnv_restore_hyp_resource:
 BEGIN_FTR_SECTION
 	ld	r2,PACATOC(r13);
 	/*
@@ -400,12 +428,9 @@ BEGIN_FTR_SECTION
 	 */
 	rldicl  r5,r5,4,60
 	cmpd	cr4,r5,r4
-	bge	cr4,pnv_wakeup_tb_loss
-	/*
-	 * Waking up without hypervisor state loss. Return to
-	 * reset vector
-	 */
-	blr
+	bge	cr4,pnv_wakeup_tb_loss /* returns to caller */
+
+	blr	/* Waking up without hypervisor state loss. */
 
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
@@ -433,8 +458,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	 */
 	bgt	cr3,.
 
-	blr	/* Return back to System Reset vector from where
-		   pnv_restore_hyp_resource was invoked */
+	blr	/* Waking up wihtout hypervisor state loss */
 
 /*
  * Called if waking up from idle state which can cause either partial or
@@ -446,7 +470,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  * cr3 - gt if waking up with partial/complete hypervisor state loss
  * cr4 - gt or eq if waking up from complete hypervisor state loss.
  */
-_GLOBAL(pnv_wakeup_tb_loss)
+pnv_wakeup_tb_loss:
 	ld	r1,PACAR1(r13)
 	/*
 	 * Before entering any idle state, the NVGPRs are saved in the stack.
@@ -466,12 +490,11 @@ _GLOBAL(pnv_wakeup_tb_loss)
 	stb	r0,PACA_NAPSTATELOST(r13)
 
 	/*
-	 *
 	 * Save SRR1 and LR in NVGPRs as they might be clobbered in
-	 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
-	 * to determine the wakeup reason if we branch to kvm_start_guest. LR
-	 * is required to return back to reset vector after hypervisor state
-	 * restore is complete.
+	 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required to
+	 * determine the wakeup reason if we branch to kvm_start_guest. LR is
+	 * required to return back to caller after hypervisor state restore is
+	 * complete.
 	 */
 	mflr	r17
 	mfspr	r16,SPRN_SRR1
@@ -651,8 +674,7 @@ hypervisor_state_restored:
 
 	mtspr	SPRN_SRR1,r16
 	mtlr	r17
-	blr	/* Return back to System Reset vector from where
-		   pnv_restore_hyp_resource was invoked */
+	blr		/* return to pnv_powersave_wakeup */
 
 fastsleep_workaround_at_exit:
 	li	r3,1
@@ -664,7 +686,8 @@ fastsleep_workaround_at_exit:
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
  */
-_GLOBAL(pnv_wakeup_loss)
+.global pnv_wakeup_loss
+pnv_wakeup_loss:
 	ld	r1,PACAR1(r13)
 BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
@@ -684,7 +707,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
  */
-_GLOBAL(pnv_wakeup_noloss)
+pnv_wakeup_noloss:
 	lbz	r0,PACA_NAPSTATELOST(r13)
 	cmpwi	r0,0
 	bne	pnv_wakeup_loss
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/7] powerpc/64s: stop using bit in HSPRG0 to test winkle
  2017-03-20  6:01 [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 1/7] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
@ 2017-03-20  6:01 ` Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 3/7] powerpc/64s: use alternative feature patching Nicholas Piggin
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20  6:01 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

The POWER8 idle code has a neat trick of programming the power on engine
to restore a low bit into HSPRG0, so idle wakeup code can test and see
if it has been programmed this way and therefore lost all state. Restore
time can be reduced if winkle has not been reached.

However this messes with our r13 PACA pointer, and requires HSPRG0 to
be written to. It also optimizes the slowest and most uncommon case at
the expense of another SPR write in the common nap state wakeup.

Remove this complexity and assume winkle sleeps always require a state
restore. This speedup could be made entirely contained within the winkle
idle code by counting per-core winkles and setting a thread bitmap when
all have gone to winkle.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/exception-64s.h | 13 ++-----------
 arch/powerpc/kernel/exceptions-64s.S     | 21 +++------------------
 arch/powerpc/kernel/idle_book3s.S        | 23 ++++++++---------------
 arch/powerpc/platforms/powernv/idle.c    | 13 -------------
 4 files changed, 13 insertions(+), 57 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 14752eee3d0c..f5fe7c901b37 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -167,17 +167,14 @@ BEGIN_FTR_SECTION_NESTED(943)						\
 	std	ra,offset(r13);						\
 END_FTR_SECTION_NESTED(ftr,ftr,943)
 
-#define EXCEPTION_PROLOG_0_PACA(area)					\
+#define EXCEPTION_PROLOG_0(area)					\
+	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 */			\
 	OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);			\
 	HMT_MEDIUM;							\
 	std	r10,area+EX_R10(r13);	/* save r10 - r12 */		\
 	OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
 
-#define EXCEPTION_PROLOG_0(area)					\
-	GET_PACA(r13);							\
-	EXCEPTION_PROLOG_0_PACA(area)
-
 #define __EXCEPTION_PROLOG_1(area, extra, vec)				\
 	OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);		\
 	OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);		\
@@ -208,12 +205,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	EXCEPTION_PROLOG_1(area, extra, vec);				\
 	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
-/* Have the PACA in r13 already */
-#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec)	\
-	EXCEPTION_PROLOG_0_PACA(area);					\
-	EXCEPTION_PROLOG_1(area, extra, vec);				\
-	EXCEPTION_PROLOG_PSERIES_1(label, h);
-
 #define __KVMTEST(h, n)							\
 	lbz	r10,HSTATE_IN_GUEST(r13);				\
 	cmpwi	r10,0;							\
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 896b439655eb..bef9b04a3b2d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -116,9 +116,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
 
 EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
 	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-	clrrdi	r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
-	EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
 				 IDLETEST, 0x100)
 
 EXC_REAL_END(system_reset, 0x100, 0x100)
@@ -148,14 +146,6 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
 	 * vector
 	 */
 	SET_SCRATCH0(r13)		/* save r13 */
-	/*
-	 * Running native on arch 2.06 or later, we may wakeup from winkle
-	 * inside machine check. If yes, then last bit of HSPRG0 would be set
-	 * to 1. Hence clear it unconditionally.
-	 */
-	GET_PACA(r13)
-	clrrdi	r13,r13,1
-	SET_PACA(r13)
 	EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
 	b	machine_check_powernv_early
@@ -339,7 +329,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
 	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
-	beq	4f			/* No, it wasn;t */
+	beq	4f			/* No, it wasn't */
 	/* Thread was in power saving mode. Go back to nap again. */
 	cmpwi	r11,2
 	blt	3f
@@ -369,13 +359,8 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	/*
 	 * Go back to winkle. Please note that this thread was woken up in
 	 * machine check from winkle and have not restored the per-subcore
-	 * state. Hence before going back to winkle, set last bit of HSPRG0
-	 * to 1. This will make sure that if this thread gets woken up
-	 * again at reset vector 0x100 then it will get chance to restore
-	 * the subcore state.
+	 * state.
 	 */
-	ori	r13,r13,1
-	SET_PACA(r13)
 	IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
 	/* No return */
 4:
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 80ca3fd37b4e..9f21f84ae49a 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -377,11 +377,12 @@ _GLOBAL(power9_idle_stop)
 	b	pnv_powersave_common
 	/* No return */
 
+/*
+ * Called from reset vector for powersave wakeups.
+ * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ */
 .global pnv_powersave_wakeup
 pnv_powersave_wakeup:
-BEGIN_FTR_SECTION
-	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	bl	pnv_restore_hyp_resource
 
 	li	r0,PNV_THREAD_RUNNING
@@ -408,12 +409,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
  * Check whether we have woken up with hypervisor state loss.
  * If yes, restore hypervisor state and return back to link.
  *
- * r13 - Contents of HSPRG0
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
 pnv_restore_hyp_resource:
-BEGIN_FTR_SECTION
 	ld	r2,PACATOC(r13);
+
+BEGIN_FTR_SECTION
 	/*
 	 * POWER ISA 3. Use PSSCR to determine if we
 	 * are waking up from deep idle state
@@ -436,19 +437,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 	/*
 	 * POWER ISA 2.07 or less.
-	 * Check if last bit of HSPGR0 is set. This indicates whether we are
-	 * waking up from winkle.
+	 * Check if we slept with winkle.
 	 */
-	clrldi	r5,r13,63
-	clrrdi	r13,r13,1
-
-	/* Now that we are sure r13 is corrected, load TOC */
-	ld	r2,PACATOC(r13);
-	cmpwi	cr4,r5,1
-	mtspr	SPRN_HSPRG0,r13
-
 	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
 	cmpwi   cr2,r0,PNV_THREAD_NAP
+	cmpwi   cr4,r0,PNV_THREAD_WINKLE
 	bgt     cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
 
 	/*
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 4ee837e6391a..f9fe0ff0f03a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -53,19 +53,6 @@ static int pnv_save_sprs_for_deep_states(void)
 		uint64_t pir = get_hard_smp_processor_id(cpu);
 		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
 
-		if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
-			/*
-			 * HSPRG0 is used to store the cpu's pointer to paca.
-			 * Hence last 3 bits are guaranteed to be 0. Program
-			 * slw to restore HSPRG0 with 63rd bit set, so that
-			 * when a thread wakes up at 0x100 we can use this bit
-			 * to distinguish between fastsleep and deep winkle.
-			 * This is not necessary with stop/psscr since PLS
-			 * field of psscr indicates which state we are waking
-			 * up from.
-			 */
-			hsprg0_val |= 1;
-		}
 		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
 		if (rc != 0)
 			return rc;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/7] powerpc/64s: use alternative feature patching
  2017-03-20  6:01 [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 1/7] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 2/7] powerpc/64s: stop using bit in HSPRG0 to test winkle Nicholas Piggin
@ 2017-03-20  6:01 ` Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 4/7] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20  6:01 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

This reduces the number of nops for POWER8

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/idle_book3s.S | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 9f21f84ae49a..819e64104469 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -383,7 +383,11 @@ _GLOBAL(power9_idle_stop)
  */
 .global pnv_powersave_wakeup
 pnv_powersave_wakeup:
-	bl	pnv_restore_hyp_resource
+BEGIN_FTR_SECTION
+	bl	pnv_restore_hyp_resource_arch300
+FTR_SECTION_ELSE
+	bl	pnv_restore_hyp_resource_arch207
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 
 	li	r0,PNV_THREAD_RUNNING
 	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
@@ -411,14 +415,13 @@ pnv_powersave_wakeup:
  *
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
-pnv_restore_hyp_resource:
-	ld	r2,PACATOC(r13);
-
-BEGIN_FTR_SECTION
+pnv_restore_hyp_resource_arch300:
 	/*
 	 * POWER ISA 3. Use PSSCR to determine if we
 	 * are waking up from deep idle state
 	 */
+	ld	r2,PACATOC(r13);
+
 	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
 	ld	r4,ADDROFF(pnv_first_deep_stop_state)(r5)
 
@@ -433,12 +436,14 @@ BEGIN_FTR_SECTION
 
 	blr	/* Waking up without hypervisor state loss. */
 
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
+/* Same calling convention as arch300 */
+pnv_restore_hyp_resource_arch207:
 	/*
 	 * POWER ISA 2.07 or less.
 	 * Check if we slept with winkle.
 	 */
+	ld	r2,PACATOC(r13);
+
 	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
 	cmpwi   cr2,r0,PNV_THREAD_NAP
 	cmpwi   cr4,r0,PNV_THREAD_WINKLE
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/7] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-20  6:01 [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (2 preceding siblings ...)
  2017-03-20  6:01 ` [PATCH 3/7] powerpc/64s: use alternative feature patching Nicholas Piggin
@ 2017-03-20  6:01 ` Nicholas Piggin
  2017-03-20 14:51   ` Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 5/7] powerpc/64s: idle expand core idle state bits Nicholas Piggin
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20  6:01 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

The ISA specifies power save wakeup can cause a machine check interrupt.
The machine check handler currently has code to handle that for POWER8,
but POWER9 crashes when trying to execute the P8 style sleep
instructions.

So queue up the machine check, then call into the idle code to wake up
as the system reset interrupt does, rather than attempting to sleep
again without going through the main idle path.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/reg.h       |  1 +
 arch/powerpc/kernel/exceptions-64s.S | 80 ++++++++++++++++++++----------------
 2 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fc879fd6bdae..8bbdfacce970 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -656,6 +656,7 @@
 #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
 #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
+#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
 #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index bef9b04a3b2d..0d379bd36808 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -306,6 +306,44 @@ EXC_COMMON_BEGIN(machine_check_common)
 	/* restore original r1. */			\
 	ld	r1,GPR1(r1)
 
+#ifdef CONFIG_PPC_P7_NAP
+EXC_COMMON_BEGIN(machine_check_idle_common)
+	bl	machine_check_queue_event
+	/*
+	 * Queue the machine check, then reload SRR1 and use it to set
+	 * CR3 according to pnv_powersave_wakeup convention.
+	 */
+	ld	r12,_MSR(r1)
+	rlwinm	r11,r12,47-31,30,31
+	cmpwi	cr3,r11,2
+
+	li	r11,1
+	stb	r11,PACA_NAPSTATELOST(r13)
+
+	/*
+	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
+	 * system reset wakeup code.
+	 */
+	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
+	mtspr	SPRN_SRR1,r12
+	std	r12,_MSR(r1)
+
+	/*
+	 * We have not used any non-volatile GPRs here, and as a rule
+	 * most exception code including machine check does not. Therefore
+	 * PACA_NAPSTATELOST does not need to be set. Non-volatile cr3 has
+	 * been set, above, but the idle machine
+	 * check code does not
+	 * Ask the idle code to restore non-volatile GPR registers too.
+	 * Machine check exception code
+	 *
+	 * Decrement MCE nesting after finishing with the stack.
+	 */
+	lhz	r11,PACA_IN_MCE(r13)
+	subi	r11,r11,1
+	sth	r11,PACA_IN_MCE(r13)
+	b	pnv_powersave_wakeup
+#endif
 	/*
 	 * Handle machine check early in real mode. We come here with
 	 * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
@@ -318,6 +356,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	bl	machine_check_early
 	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
+
 #ifdef	CONFIG_PPC_P7_NAP
 	/*
 	 * Check if thread was in power saving mode. We come here when any
@@ -328,43 +367,14 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 *
 	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
-	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
-	beq	4f			/* No, it wasn't */
-	/* Thread was in power saving mode. Go back to nap again. */
-	cmpwi	r11,2
-	blt	3f
-	/* Supervisor/Hypervisor state loss */
-	li	r0,1
-	stb	r0,PACA_NAPSTATELOST(r13)
-3:	bl	machine_check_queue_event
-	MACHINE_CHECK_HANDLER_WINDUP
-	GET_PACA(r13)
-	ld	r1,PACAR1(r13)
-	/*
-	 * Check what idle state this CPU was in and go back to same mode
-	 * again.
-	 */
-	lbz	r3,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi	r3,PNV_THREAD_NAP
-	bgt	10f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
-	/* No return */
-10:
-	cmpwi	r3,PNV_THREAD_SLEEP
-	bgt	2f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
-	/* No return */
-
-2:
-	/*
-	 * Go back to winkle. Please note that this thread was woken up in
-	 * machine check from winkle and have not restored the per-subcore
-	 * state.
-	 */
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-	/* No return */
+	BEGIN_FTR_SECTION
+	rlwinm.	r11,r12,47-31,30,31
+	beq-	4f
+	BRANCH_TO_COMMON(r10, machine_check_idle_common)
 4:
+	END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
+
 	/*
 	 * Check if we are coming from hypervisor userspace. If yes then we
 	 * continue in host kernel in V mode to deliver the MC event.
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 5/7] powerpc/64s: idle expand core idle state bits
  2017-03-20  6:01 [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (3 preceding siblings ...)
  2017-03-20  6:01 ` [PATCH 4/7] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
@ 2017-03-20  6:01 ` Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 6/7] powerpc/64s: idle do not hold reservation longer than required Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible Nicholas Piggin
  6 siblings, 0 replies; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20  6:01 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

In preparation for adding more bits to the core idle state word,
move the lock bit up, and unlock by flipping the lock bit rather
than masking off all but the thread bits.

Add branch hints for atomic operations while we're here.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/cpuidle.h |  4 ++--
 arch/powerpc/kernel/idle_book3s.S  | 33 +++++++++++++++++----------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 155731557c9b..b9d9f960dffd 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -7,8 +7,8 @@
 #define PNV_THREAD_NAP                  1
 #define PNV_THREAD_SLEEP                2
 #define PNV_THREAD_WINKLE               3
-#define PNV_CORE_IDLE_LOCK_BIT          0x100
-#define PNV_CORE_IDLE_THREAD_BITS       0x0FF
+#define PNV_CORE_IDLE_LOCK_BIT          0x10000000
+#define PNV_CORE_IDLE_THREAD_BITS       0x000000FF
 
 /*
  * ============================ NOTE =================================
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 819e64104469..d3cc6c3e97bc 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -95,12 +95,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 core_idle_lock_held:
 	HMT_LOW
 3:	lwz	r15,0(r14)
-	andi.   r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	andis.	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	bne	3b
 	HMT_MEDIUM
 	lwarx	r15,0,r14
-	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bne	core_idle_lock_held
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	bne-	core_idle_lock_held
 	blr
 
 /*
@@ -213,8 +213,8 @@ pnv_enter_arch207_idle_mode:
 lwarx_loop1:
 	lwarx	r15,0,r14
 
-	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bnel	core_idle_lock_held
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	bnel-	core_idle_lock_held
 
 	andc	r15,r15,r7			/* Clear thread bit */
 
@@ -241,7 +241,7 @@ common_enter: /* common code for all the threads entering sleep or winkle */
 	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
 
 fastsleep_workaround_at_entry:
-	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	stwcx.	r15,0,r14
 	bne-	lwarx_loop1
 	isync
@@ -251,10 +251,10 @@ fastsleep_workaround_at_entry:
 	li	r4,1
 	bl	opal_config_cpu_idle_state
 
-	/* Clear Lock bit */
-	li	r0,0
+	/* Unlock */
+	xoris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	lwsync
-	stw	r0,0(r14)
+	stw	r15,0(r14)
 	b	common_enter
 
 enter_winkle:
@@ -302,8 +302,8 @@ power_enter_stop:
 
 lwarx_loop_stop:
 	lwarx   r15,0,r14
-	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bnel    core_idle_lock_held
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	bnel-	core_idle_lock_held
 	andc    r15,r15,r7                      /* Clear thread bit */
 
 	stwcx.  r15,0,r14
@@ -504,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
 lwarx_loop2:
 	lwarx	r15,0,r14
-	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	/*
 	 * Lock bit is set in one of the 2 cases-
 	 * a. In the sleep/winkle enter path, the last thread is executing
@@ -513,9 +513,10 @@ lwarx_loop2:
 	 * workaround undo code or resyncing timebase or restoring context
 	 * In either case loop until the lock bit is cleared.
 	 */
-	bnel	core_idle_lock_held
+	bnel-	core_idle_lock_held
 
-	cmpwi	cr2,r15,0
+	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
+	cmpwi	cr2,r9,0
 
 	/*
 	 * At this stage
@@ -524,7 +525,7 @@ lwarx_loop2:
 	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
 	 */
 
-	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	stwcx.	r15,0,r14
 	bne-	lwarx_loop2
 	isync
@@ -614,7 +615,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mtspr	SPRN_WORC,r4
 
 clear_lock:
-	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+	xoris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	lwsync
 	stw	r15,0(r14)
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 6/7] powerpc/64s: idle do not hold reservation longer than required
  2017-03-20  6:01 [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (4 preceding siblings ...)
  2017-03-20  6:01 ` [PATCH 5/7] powerpc/64s: idle expand core idle state bits Nicholas Piggin
@ 2017-03-20  6:01 ` Nicholas Piggin
  2017-03-20  6:01 ` [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible Nicholas Piggin
  6 siblings, 0 replies; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20  6:01 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

When taking the core idle state lock, grab it immediately like a
regular lock, rather than adding more tests in there. Holding the lock
keeps it stable, so there is no need to do it whole holding the
reservation.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/idle_book3s.S | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index d3cc6c3e97bc..b085510d697f 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -500,12 +500,12 @@ BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 
-	lbz	r7,PACA_THREAD_MASK(r13)
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
-lwarx_loop2:
-	lwarx	r15,0,r14
-	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	lbz	r7,PACA_THREAD_MASK(r13)
+
 	/*
+	 * Take the core lock to synchronize against other threads.
+	 *
 	 * Lock bit is set in one of the 2 cases-
 	 * a. In the sleep/winkle enter path, the last thread is executing
 	 * fastsleep workaround code.
@@ -513,7 +513,14 @@ lwarx_loop2:
 	 * workaround undo code or resyncing timebase or restoring context
 	 * In either case loop until the lock bit is cleared.
 	 */
+1:
+	lwarx	r15,0,r14
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	bnel-	core_idle_lock_held
+	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	stwcx.	r15,0,r14
+	bne-	1b
+	isync
 
 	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
 	cmpwi	cr2,r9,0
@@ -525,11 +532,6 @@ lwarx_loop2:
 	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
 	 */
 
-	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
-	stwcx.	r15,0,r14
-	bne-	lwarx_loop2
-	isync
-
 BEGIN_FTR_SECTION
 	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
 	and	r4,r4,r15
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible
  2017-03-20  6:01 [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (5 preceding siblings ...)
  2017-03-20  6:01 ` [PATCH 6/7] powerpc/64s: idle do not hold reservation longer than required Nicholas Piggin
@ 2017-03-20  6:01 ` Nicholas Piggin
  2017-03-20 10:11   ` Gautham R Shenoy
  6 siblings, 1 reply; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20  6:01 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

If not all threads were in winkle, full state loss recovery is not
necessary and can be avoided. A previous patch removed this optimisation
due to some complexity with the implementation. Re-implement it by
counting the number of threads in winkle with the per-core idle state.
Only restore full state loss if all threads were in winkle.

This has a small window of false positives right before threads execute
winkle and just after they wake up, when the winkle count does not
reflect the true number of threads in winkle. This is not a significant
problem in comparison with even the minimum winkle duration. For
correctness, a false positive is not a problem (only false negatives
would be).
---
 arch/powerpc/include/asm/cpuidle.h | 32 ++++++++++++--
 arch/powerpc/kernel/idle_book3s.S  | 87 +++++++++++++++++++++++++++++++++++---
 2 files changed, 110 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index b9d9f960dffd..b68a5cd75ae8 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -2,13 +2,39 @@
 #define _ASM_POWERPC_CPUIDLE_H
 
 #ifdef CONFIG_PPC_POWERNV
-/* Used in powernv idle state management */
+/* Thread state used in powernv idle state management */
 #define PNV_THREAD_RUNNING              0
 #define PNV_THREAD_NAP                  1
 #define PNV_THREAD_SLEEP                2
 #define PNV_THREAD_WINKLE               3
-#define PNV_CORE_IDLE_LOCK_BIT          0x10000000
-#define PNV_CORE_IDLE_THREAD_BITS       0x000000FF
+
+/*
+ * Core state used in powernv idle for POWER8.
+ *
+ * The lock bit synchronizes updates to the state, as well as parts of the
+ * sleep/wake code (see kernel/idle_book3s.S).
+ *
+ * Bottom 8 bits track the idle state of each thread. Bit is cleared before
+ * the thread executes an idle instruction (nap/sleep/winkle).
+ *
+ * Then there is winkle tracking. A core does not lose complete state
+ * until every thread is in winkle. So the winkle count field counts the
+ * number of threads in winkle (small window of false positives is okay
+ * around the sleep/wake, so long as there are no false negatives).
+ *
+ * When the winkle count reaches 8 (the COUNT_ALL_BIT becomes set), then
+ * the THREAD_WINKLE_BITS are set, which indicate which threads have not
+ * yet woken from the winkle state.
+ */
+#define PNV_CORE_IDLE_LOCK_BIT			0x10000000
+
+#define PNV_CORE_IDLE_WINKLE_COUNT		0x00010000
+#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT	0x00080000
+#define PNV_CORE_IDLE_WINKLE_COUNT_BITS		0x000F0000
+#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT	8
+#define PNV_CORE_IDLE_THREAD_WINKLE_BITS	0x0000FF00
+
+#define PNV_CORE_IDLE_THREAD_BITS       	0x000000FF
 
 /*
  * ============================ NOTE =================================
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index b085510d697f..53fb71df3393 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -210,15 +210,20 @@ pnv_enter_arch207_idle_mode:
 	/* Sleep or winkle */
 	lbz	r7,PACA_THREAD_MASK(r13)
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+	li	r5,0
+	beq	cr3,3f
+	lis	r5,PNV_CORE_IDLE_WINKLE_COUNT@h
+3:
 lwarx_loop1:
 	lwarx	r15,0,r14
 
 	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	bnel-	core_idle_lock_held
 
+	add	r15,r15,r5			/* Add if winkle */
 	andc	r15,r15,r7			/* Clear thread bit */
 
-	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
 
 /*
  * If cr0 = 0, then current thread is the last thread of the core entering
@@ -440,14 +445,13 @@ pnv_restore_hyp_resource_arch300:
 pnv_restore_hyp_resource_arch207:
 	/*
 	 * POWER ISA 2.07 or less.
-	 * Check if we slept with winkle.
+	 * Check if we slept with sleep or winkle.
 	 */
 	ld	r2,PACATOC(r13);
 
-	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi   cr2,r0,PNV_THREAD_NAP
-	cmpwi   cr4,r0,PNV_THREAD_WINKLE
-	bgt     cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
+	lbz	r4,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi	cr2,r4,PNV_THREAD_NAP
+	bgt	cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
 
 	/*
 	 * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
@@ -466,7 +470,12 @@ pnv_restore_hyp_resource_arch207:
  *
  * r13 - PACA
  * cr3 - gt if waking up with partial/complete hypervisor state loss
+ *
+ * If ISA300:
  * cr4 - gt or eq if waking up from complete hypervisor state loss.
+ *
+ * If ISA207:
+ * r4 - PACA_THREAD_IDLE_STATE
  */
 pnv_wakeup_tb_loss:
 	ld	r1,PACAR1(r13)
@@ -494,6 +503,7 @@ pnv_wakeup_tb_loss:
 	 * required to return back to caller after hypervisor state restore is
 	 * complete.
 	 */
+	mr	r18,r4
 	mflr	r17
 	mfspr	r16,SPRN_SRR1
 BEGIN_FTR_SECTION
@@ -529,10 +539,75 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	 * At this stage
 	 * cr2 - eq if first thread to wakeup in core
 	 * cr3-  gt if waking up with partial/complete hypervisor state loss
+	 * ISA300:
 	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
 	 */
 
 BEGIN_FTR_SECTION
+	/*
+	 * Were we in winkle?
+	 * If yes, check if all threads were in winkle, decrement our
+	 * winkle count, set all thread winkle bits if all were in winkle.
+	 * Check if our thread has a winkle bit set, and set cr4 accordingly
+	 * (to match ISA300, above). Pseudo-code for core idle state
+	 * transitions for ISA207 is as follows (everything happens atomically
+	 * due to store conditional and/or lock bit):
+	 *
+	 * nap_idle() { }
+	 * nap_wake() { }
+	 *
+	 * sleep_idle()
+	 * {
+	 *	core_idle_state &= ~thread_in_core
+	 * }
+	 *
+	 * sleep_wake()
+	 * {
+	 *     bool first_in_core, first_in_subcore;
+	 *
+	 *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
+	 *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
+	 *
+	 *     core_idle_state |= thread_in_core;
+	 * }
+	 *
+	 * winkle_idle()
+	 * {
+	 *	core_idle_state &= ~thread_in_core;
+	 *	core_idle_state += 1 << WINKLE_COUNT_SHIFT;
+	 * }
+	 *
+	 * winkle_wake()
+	 * {
+	 *     bool first_in_core, first_in_subcore, winkle_state_lost;
+	 *
+	 *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
+	 *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
+	 *
+	 *     core_idle_state |= thread_in_core;
+	 *
+	 *     if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
+	 *         core_idle_state |= THREAD_WINKLE_BITS;
+	 *     winkle_state_lost = core_idle_state &
+	 *				(thread_in_core << WINKLE_THREAD_SHIFT);
+	 *     core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
+	 * }
+	 *
+	 */
+	cmpwi	r18,PNV_THREAD_WINKLE
+	bne	2f
+	andis.	r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
+	subis	r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
+	beq	2f
+	ori	r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
+2:
+	/* Shift thread bit to winkle mask, then test if this thread is set,
+	 * and remove it from the winkle bits */
+	slwi	r8,r7,8
+	and	r8,r8,r15
+	andc	r15,r15,r8
+	cmpwi	cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
+
 	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
 	and	r4,r4,r15
 	cmpwi	r4,0	/* Check if first in subcore */
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible
  2017-03-20  6:01 ` [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible Nicholas Piggin
@ 2017-03-20 10:11   ` Gautham R Shenoy
  2017-03-20 10:26     ` Nicholas Piggin
  0 siblings, 1 reply; 13+ messages in thread
From: Gautham R Shenoy @ 2017-03-20 10:11 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: linuxppc-dev, Michael Ellerman, Gautham R . Shenoy,
	Mahesh Jagannath Salgaonkar

Hi Nick,

On Mon, Mar 20, 2017 at 04:01:52PM +1000, Nicholas Piggin wrote:
> If not all threads were in winkle, full state loss recovery is not
> necessary and can be avoided. A previous patch removed this optimisation
> due to some complexity with the implementation. Re-implement it by
> counting the number of threads in winkle with the per-core idle state.
> Only restore full state loss if all threads were in winkle.
> 
> This has a small window of false positives right before threads execute
> winkle and just after they wake up, when the winkle count does not
> reflect the true number of threads in winkle. This is not a significant
> problem in comparison with even the minimum winkle duration. For
> correctness, a false positive is not a problem (only false negatives
> would be).

The patch looks good. Just a minor comment.


>  BEGIN_FTR_SECTION
> +	/*
> +	 * Were we in winkle?
> +	 * If yes, check if all threads were in winkle, decrement our
> +	 * winkle count, set all thread winkle bits if all were in winkle.
> +	 * Check if our thread has a winkle bit set, and set cr4 accordingly
> +	 * (to match ISA300, above). Pseudo-code for core idle state
> +	 * transitions for ISA207 is as follows (everything happens atomically
> +	 * due to store conditional and/or lock bit):
> +	 *
> +	 * nap_idle() { }
> +	 * nap_wake() { }
> +	 *
> +	 * sleep_idle()
> +	 * {
> +	 *	core_idle_state &= ~thread_in_core
> +	 * }
> +	 *
> +	 * sleep_wake()
> +	 * {
> +	 *     bool first_in_core, first_in_subcore;
> +	 *
> +	 *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
> +	 *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
> +	 *
> +	 *     core_idle_state |= thread_in_core;
> +	 * }
> +	 *
> +	 * winkle_idle()
> +	 * {
> +	 *	core_idle_state &= ~thread_in_core;
> +	 *	core_idle_state += 1 << WINKLE_COUNT_SHIFT;
> +	 * }
> +	 *
> +	 * winkle_wake()
> +	 * {
> +	 *     bool first_in_core, first_in_subcore, winkle_state_lost;
> +	 *
> +	 *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
> +	 *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
> +	 *
> +	 *     core_idle_state |= thread_in_core;
> +	 *
> +	 *     if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
> +	 *         core_idle_state |= THREAD_WINKLE_BITS;

We also do the following decrement. I forgot this in the pseudo-code in my
earlier reply.

  	 	core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
		
		   
> +	 *     winkle_state_lost = core_idle_state &
> +	 *				(thread_in_core << WINKLE_THREAD_SHIFT);
> +	 *     core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
> +	 * }
> +	 *
> +	 */
> +	cmpwi	r18,PNV_THREAD_WINKLE
> +	bne	2f
> +	andis.	r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
> +	subis	r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
> +	beq	2f
> +	ori	r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
> +2:
> +	/* Shift thread bit to winkle mask, then test if this thread is set,
> +	 * and remove it from the winkle bits */
> +	slwi	r8,r7,8
> +	and	r8,r8,r15
> +	andc	r15,r15,r8
> +	cmpwi	cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
> +

Looks good other wise.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>


>  	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
>  	and	r4,r4,r15
>  	cmpwi	r4,0	/* Check if first in subcore */
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible
  2017-03-20 10:11   ` Gautham R Shenoy
@ 2017-03-20 10:26     ` Nicholas Piggin
  2017-03-20 16:31       ` Gautham R Shenoy
  0 siblings, 1 reply; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20 10:26 UTC (permalink / raw)
  To: Gautham R Shenoy
  Cc: linuxppc-dev, Michael Ellerman, Mahesh Jagannath Salgaonkar

On Mon, 20 Mar 2017 15:41:39 +0530
Gautham R Shenoy <ego@linux.vnet.ibm.com> wrote:

> Hi Nick,
> 
> On Mon, Mar 20, 2017 at 04:01:52PM +1000, Nicholas Piggin wrote:
> > If not all threads were in winkle, full state loss recovery is not
> > necessary and can be avoided. A previous patch removed this optimisation
> > due to some complexity with the implementation. Re-implement it by
> > counting the number of threads in winkle with the per-core idle state.
> > Only restore full state loss if all threads were in winkle.
> > 
> > This has a small window of false positives right before threads execute
> > winkle and just after they wake up, when the winkle count does not
> > reflect the true number of threads in winkle. This is not a significant
> > problem in comparison with even the minimum winkle duration. For
> > correctness, a false positive is not a problem (only false negatives
> > would be).  
> 
> The patch looks good. Just a minor comment.
> 
> 
> >  BEGIN_FTR_SECTION
> > +	/*
> > +	 * Were we in winkle?
> > +	 * If yes, check if all threads were in winkle, decrement our
> > +	 * winkle count, set all thread winkle bits if all were in winkle.
> > +	 * Check if our thread has a winkle bit set, and set cr4 accordingly
> > +	 * (to match ISA300, above). Pseudo-code for core idle state
> > +	 * transitions for ISA207 is as follows (everything happens atomically
> > +	 * due to store conditional and/or lock bit):
> > +	 *
> > +	 * nap_idle() { }
> > +	 * nap_wake() { }
> > +	 *
> > +	 * sleep_idle()
> > +	 * {
> > +	 *	core_idle_state &= ~thread_in_core
> > +	 * }
> > +	 *
> > +	 * sleep_wake()
> > +	 * {
> > +	 *     bool first_in_core, first_in_subcore;
> > +	 *
> > +	 *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
> > +	 *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
> > +	 *
> > +	 *     core_idle_state |= thread_in_core;
> > +	 * }
> > +	 *
> > +	 * winkle_idle()
> > +	 * {
> > +	 *	core_idle_state &= ~thread_in_core;
> > +	 *	core_idle_state += 1 << WINKLE_COUNT_SHIFT;
> > +	 * }
> > +	 *
> > +	 * winkle_wake()
> > +	 * {
> > +	 *     bool first_in_core, first_in_subcore, winkle_state_lost;
> > +	 *
> > +	 *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
> > +	 *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
> > +	 *
> > +	 *     core_idle_state |= thread_in_core;
> > +	 *
> > +	 *     if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
> > +	 *         core_idle_state |= THREAD_WINKLE_BITS;  
> 
> We also do the following decrement. I forgot this in the pseudo-code in my
> earlier reply.
> 
>   	 	core_idle_state -= 1 << WINKLE_COUNT_SHIFT;

Lucky somebody is paying attention. Yes, this is needed. I won't resend
a patch if mpe can make the change.

 
> Looks good other wise.
> 
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

Thank you.

What do you want to do about your DD1 fix? I think there are some minor
clashes between them. I'm happy to rebase on top of yours if you prefer
it to go in first.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/7] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-20  6:01 ` [PATCH 4/7] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
@ 2017-03-20 14:51   ` Nicholas Piggin
  0 siblings, 0 replies; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20 14:51 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Ellerman, Gautham R . Shenoy, Mahesh Jagannath Salgaonkar

On Mon, 20 Mar 2017 16:01:49 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> The ISA specifies power save wakeup can cause a machine check interrupt.
> The machine check handler currently has code to handle that for POWER8,
> but POWER9 crashes when trying to execute the P8 style sleep
> instructions.
> 
> So queue up the machine check, then call into the idle code to wake up
> as the system reset interrupt does, rather than attempting to sleep
> again without going through the main idle path.
> 
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Hmm, I sent the wrong patch here. Not having a good day. Sorry to waste
your time :( (It works fine, but it was setting NAPSTATELOST, which I
since decided we don't need, and had an incomplete comment change.)

Here's a patch with the comment explaining why nap state is not lost,
and a bit more code moved into idle_book3s.S -- now exception-64s.S does
not know anything about idle wakeups except SRR1[46:47].

BTW, I've tested this in mambo including nested non-idle MCEs inside the
idle-wakeup MCE at various points. I will tidy up and submit the mambo
script I've been using for MCE injection to skiboot tomorrow.

Thanks,
Nick


The ISA specifies power save wakeup can cause a machine check interrupt.
The machine check handler currently has code to handle that for POWER8,
but POWER9 crashes when trying to execute the P8 style sleep
instructions.

So queue up the machine check, then call into the idle code to wake up
as the system reset interrupt does, rather than attempting to sleep
again without going through the main idle path.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/reg.h       |  1 +
 arch/powerpc/kernel/exceptions-64s.S | 73 +++++++++++++++++++-----------------
 arch/powerpc/kernel/idle_book3s.S    | 25 ++++++++++++
 3 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fc879fd6bdae..8bbdfacce970 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -656,6 +656,7 @@
 #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
 #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
+#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
 #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index bef9b04a3b2d..e9d4be08813a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -306,6 +306,37 @@ EXC_COMMON_BEGIN(machine_check_common)
 	/* restore original r1. */			\
 	ld	r1,GPR1(r1)
 
+#ifdef CONFIG_PPC_P7_NAP
+/*
+ * This is an idle wakeup. Low level machine check has already been
+ * done. Queue the event then call the idle code to do the wake up.
+ */
+EXC_COMMON_BEGIN(machine_check_idle_common)
+	bl	machine_check_queue_event
+
+	/*
+	 * We have not used any non-volatile GPRs here, and as a rule
+	 * most exception code including machine check does not.
+	 * Therefore PACA_NAPSTATELOST does not need to be set. Idle
+	 * wakeup will restore volatile registers.
+	 *
+	 * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce.
+	 *
+	 * Then decrement MCE nesting after finishing with the stack.
+	 */
+	ld	r3,_MSR(r1)
+
+	lhz	r11,PACA_IN_MCE(r13)
+	subi	r11,r11,1
+	sth	r11,PACA_IN_MCE(r13)
+
+	/* Turn off the RI bit because SRR1 is used by idle wakeup code. */
+	/* Recoverability could be improved by reducing the use of SRR1. */
+	li	r11,0
+	mtmsrd	r11,1
+
+	b	pnv_powersave_wakeup_mce
+#endif
 	/*
 	 * Handle machine check early in real mode. We come here with
 	 * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
@@ -318,6 +349,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	bl	machine_check_early
 	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
+
 #ifdef	CONFIG_PPC_P7_NAP
 	/*
 	 * Check if thread was in power saving mode. We come here when any
@@ -328,43 +360,14 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 *
 	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
-	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
-	beq	4f			/* No, it wasn't */
-	/* Thread was in power saving mode. Go back to nap again. */
-	cmpwi	r11,2
-	blt	3f
-	/* Supervisor/Hypervisor state loss */
-	li	r0,1
-	stb	r0,PACA_NAPSTATELOST(r13)
-3:	bl	machine_check_queue_event
-	MACHINE_CHECK_HANDLER_WINDUP
-	GET_PACA(r13)
-	ld	r1,PACAR1(r13)
-	/*
-	 * Check what idle state this CPU was in and go back to same mode
-	 * again.
-	 */
-	lbz	r3,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi	r3,PNV_THREAD_NAP
-	bgt	10f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
-	/* No return */
-10:
-	cmpwi	r3,PNV_THREAD_SLEEP
-	bgt	2f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
-	/* No return */
-
-2:
-	/*
-	 * Go back to winkle. Please note that this thread was woken up in
-	 * machine check from winkle and have not restored the per-subcore
-	 * state.
-	 */
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-	/* No return */
+	BEGIN_FTR_SECTION
+	rlwinm.	r11,r12,47-31,30,31
+	beq-	4f
+	BRANCH_TO_COMMON(r10, machine_check_idle_common)
 4:
+	END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
+
 	/*
 	 * Check if we are coming from hypervisor userspace. If yes then we
 	 * continue in host kernel in V mode to deliver the MC event.
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 819e64104469..02ec876423cd 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -378,6 +378,31 @@ _GLOBAL(power9_idle_stop)
 	/* No return */
 
 /*
+ * Called from machine check handler for powersave wakeups.
+ * Low level machine check processing has already been done. Now just
+ * go through the wake up path to get everything in order.
+ *
+ * r3 - The original SRR1 value.
+ * Original SRR[01] have been clobbered.
+ * MSR_RI is clear.
+ */
+.global pnv_powersave_wakeup_mce
+pnv_powersave_wakeup_mce:
+	/* Set cr3 for pnv_powersave_wakeup */
+	rlwinm	r11,r3,47-31,30,31
+	cmpwi	cr3,r11,2
+
+	/*
+	 * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
+	 * reason into SRR1, which allows reuse of the system reset wakeup
+	 * code without being mistaken for another type of wakeup.
+	 */
+	oris	r3,r3,SRR1_WAKEMCE_RESVD@h
+	mtspr	SPRN_SRR1,r3
+
+	b	pnv_powersave_wakeup
+
+/*
  * Called from reset vector for powersave wakeups.
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible
  2017-03-20 10:26     ` Nicholas Piggin
@ 2017-03-20 16:31       ` Gautham R Shenoy
  2017-03-20 17:19         ` Nicholas Piggin
  0 siblings, 1 reply; 13+ messages in thread
From: Gautham R Shenoy @ 2017-03-20 16:31 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Gautham R Shenoy, linuxppc-dev, Michael Ellerman,
	Mahesh Jagannath Salgaonkar

Hi Nick,

On Mon, Mar 20, 2017 at 08:26:05PM +1000, Nicholas Piggin wrote:
> On Mon, 20 Mar 2017 15:41:39 +0530
> Gautham R Shenoy <ego@linux.vnet.ibm.com> wrote:
> 
> > Hi Nick,
> > 
> > On Mon, Mar 20, 2017 at 04:01:52PM +1000, Nicholas Piggin wrote:
> > > If not all threads were in winkle, full state loss recovery is not
> > > necessary and can be avoided. A previous patch removed this optimisation
> > > due to some complexity with the implementation. Re-implement it by
> > > counting the number of threads in winkle with the per-core idle state.
> > > Only restore full state loss if all threads were in winkle.
> > > 
> > > This has a small window of false positives right before threads execute
> > > winkle and just after they wake up, when the winkle count does not
> > > reflect the true number of threads in winkle. This is not a significant
> > > problem in comparison with even the minimum winkle duration. For
> > > correctness, a false positive is not a problem (only false negatives
> > > would be).  
> > 
> > The patch looks good. Just a minor comment.
> > 
> > 
> > >  BEGIN_FTR_SECTION
> > > +	/*
> > > +	 * Were we in winkle?
> > > +	 * If yes, check if all threads were in winkle, decrement our
> > > +	 * winkle count, set all thread winkle bits if all were in winkle.
> > > +	 * Check if our thread has a winkle bit set, and set cr4 accordingly
> > > +	 * (to match ISA300, above). Pseudo-code for core idle state
> > > +	 * transitions for ISA207 is as follows (everything happens atomically
> > > +	 * due to store conditional and/or lock bit):
> > > +	 *
> > > +	 * nap_idle() { }
> > > +	 * nap_wake() { }
> > > +	 *
> > > +	 * sleep_idle()
> > > +	 * {
> > > +	 *	core_idle_state &= ~thread_in_core
> > > +	 * }
> > > +	 *
> > > +	 * sleep_wake()
> > > +	 * {
> > > +	 *     bool first_in_core, first_in_subcore;
> > > +	 *
> > > +	 *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
> > > +	 *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
> > > +	 *
> > > +	 *     core_idle_state |= thread_in_core;
> > > +	 * }
> > > +	 *
> > > +	 * winkle_idle()
> > > +	 * {
> > > +	 *	core_idle_state &= ~thread_in_core;
> > > +	 *	core_idle_state += 1 << WINKLE_COUNT_SHIFT;
> > > +	 * }
> > > +	 *
> > > +	 * winkle_wake()
> > > +	 * {
> > > +	 *     bool first_in_core, first_in_subcore, winkle_state_lost;
> > > +	 *
> > > +	 *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
> > > +	 *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
> > > +	 *
> > > +	 *     core_idle_state |= thread_in_core;
> > > +	 *
> > > +	 *     if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
> > > +	 *         core_idle_state |= THREAD_WINKLE_BITS;  
> > 
> > We also do the following decrement. I forgot this in the pseudo-code in my
> > earlier reply.
> > 
> >   	 	core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
> 
> Lucky somebody is paying attention. Yes, this is needed. I won't resend
> a patch if mpe can make the change.
> 
> 
> > Looks good other wise.
> > 
> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> 
> Thank you.
> 
> What do you want to do about your DD1 fix? I think there are some minor
> clashes between them. I'm happy to rebase on top of yours if you prefer
> it to go in first.

I have sent an updated version of the DD1 fix today rebasing on
v4.11-rc3.

I applied your series on top of that and noticed some minor conflict
with patches 1,2,3 and 7.

If you are ok with it, I would like the DD1 Hotplug fixes to go in
first.

> 
> Thanks,
> Nick
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible
  2017-03-20 16:31       ` Gautham R Shenoy
@ 2017-03-20 17:19         ` Nicholas Piggin
  0 siblings, 0 replies; 13+ messages in thread
From: Nicholas Piggin @ 2017-03-20 17:19 UTC (permalink / raw)
  To: Gautham R Shenoy
  Cc: linuxppc-dev, Michael Ellerman, Mahesh Jagannath Salgaonkar

On Mon, 20 Mar 2017 22:01:15 +0530
Gautham R Shenoy <ego@linux.vnet.ibm.com> wrote:

> > What do you want to do about your DD1 fix? I think there are some minor
> > clashes between them. I'm happy to rebase on top of yours if you prefer
> > it to go in first.  
> 
> I have sent an updated version of the DD1 fix today rebasing on
> v4.11-rc3.
> 
> I applied your series on top of that and noticed some minor conflict
> with patches 1,2,3 and 7.
> 
> If you are ok with it, I would like the DD1 Hotplug fixes to go in
> first.

This would be fine with me. Your clashing hunk is pretty trivial to fix
on top of my series though (it actually becomes nicer):

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s
index 3330a4293d1a..6d8e022da183 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -414,6 +414,9 @@ pnv_powersave_wakeup_mce:
 .global pnv_powersave_wakeup
 pnv_powersave_wakeup:
 BEGIN_FTR_SECTION
+BEGIN_FTR_SECTION_NESTED(70)
+       bl      power9_dd1_recover_paca
+END_FTR_SECTION_NESTED(CPU_FTR_POWER9_DD1, CPU_FTR_POWER9_DD1, 70)
        bl      pnv_restore_hyp_resource_arch300
 FTR_SECTION_ELSE
        bl      pnv_restore_hyp_resource_arch207

So we could easily do it the other way around too. The question is, when will
they get merged, and what needs to be backported. I'll let you and mpe decide
that :)

Thanks,
Nick

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-03-20 17:19 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-20  6:01 [PATCH 0/7][v3] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
2017-03-20  6:01 ` [PATCH 1/7] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
2017-03-20  6:01 ` [PATCH 2/7] powerpc/64s: stop using bit in HSPRG0 to test winkle Nicholas Piggin
2017-03-20  6:01 ` [PATCH 3/7] powerpc/64s: use alternative feature patching Nicholas Piggin
2017-03-20  6:01 ` [PATCH 4/7] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
2017-03-20 14:51   ` Nicholas Piggin
2017-03-20  6:01 ` [PATCH 5/7] powerpc/64s: idle expand core idle state bits Nicholas Piggin
2017-03-20  6:01 ` [PATCH 6/7] powerpc/64s: idle do not hold reservation longer than required Nicholas Piggin
2017-03-20  6:01 ` [PATCH 7/7] powerpc/64s: idle POWER8 avoid full state loss recovery where possible Nicholas Piggin
2017-03-20 10:11   ` Gautham R Shenoy
2017-03-20 10:26     ` Nicholas Piggin
2017-03-20 16:31       ` Gautham R Shenoy
2017-03-20 17:19         ` Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.