All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/8] idle fixes and changes for POWER8 and POWER9
@ 2017-03-14  9:23 Nicholas Piggin
  2017-03-14  9:23 ` [PATCH 1/8] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
                   ` (7 more replies)
  0 siblings, 8 replies; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

Hi,

This is a resend of the previous idle series, with the addition that
I accounted for Gautham's feedback, and also re-introduced the feature
to avoid full state restore by counting winkles rather than special
HSPRG0 bit.

The two big things we get from this, is no longer messing with HSPRG0
in the idle wakeup path on POWER8, and not crashing on POWER9 machine
check from power saving mode (tested in mambo according to ISA specs,
but have not triggered it on real hardware).

I only added the reviewed-by for patches which were not significantly
chaged since last time.

Thanks,
Nick

Nicholas Piggin (8):
  powerpc/64s: move remaining system reset idle code into idle_book3s.S
  powerpc/64s: stop using bit in HSPRG0 to test winkle
  powerpc/64s: use alternative feature patching
  powerpc/64s: fix POWER9 machine check handler from stop state
  powerpc/64s: use PACA_THREAD_IDLE_STATE only in POWER8
  powerpc/64s: idle expand usable core idle state bits
  powerpc/64s: idle do not hold reservation longer than required
  powerpc/64s: idle POWER8 avoid full state loss recovery when possible

 arch/powerpc/include/asm/cpuidle.h       |  32 +++++-
 arch/powerpc/include/asm/exception-64s.h |  13 +--
 arch/powerpc/include/asm/paca.h          |  12 +-
 arch/powerpc/include/asm/reg.h           |   1 +
 arch/powerpc/kernel/exceptions-64s.S     | 112 ++++++------------
 arch/powerpc/kernel/idle_book3s.S        | 189 ++++++++++++++++++++-----------
 arch/powerpc/platforms/powernv/idle.c    |  13 ---
 7 files changed, 202 insertions(+), 170 deletions(-)

-- 
2.11.0

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/8] powerpc/64s: move remaining system reset idle code into idle_book3s.S
  2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
@ 2017-03-14  9:23 ` Nicholas Piggin
  2017-03-14  9:23 ` [PATCH 2/8] powerpc/64s: stop using bit in HSPRG0 to test winkle Nicholas Piggin
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

Should be no functional change.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 26 +-----------
 arch/powerpc/kernel/idle_book3s.S    | 76 ++++++++++++++++++++++++------------
 2 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 857bf7c5b946..2f837a4a78a2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -126,31 +126,7 @@ EXC_VIRT_NONE(0x4100, 0x100)
 
 #ifdef CONFIG_PPC_P7_NAP
 EXC_COMMON_BEGIN(system_reset_idle_common)
-BEGIN_FTR_SECTION
-	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-	bl	pnv_restore_hyp_resource
-
-	li	r0,PNV_THREAD_RUNNING
-	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	li	r0,KVM_HWTHREAD_IN_KERNEL
-	stb	r0,HSTATE_HWTHREAD_STATE(r13)
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	sync
-	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
-	cmpwi	r0,0
-	beq	1f
-	BRANCH_TO_KVM(r10, kvm_start_guest)
-1:
-#endif
-
-	/* Return SRR1 from power7_nap() */
-	mfspr	r3,SPRN_SRR1
-	blt	cr3,2f
-	b	pnv_wakeup_loss
-2:	b	pnv_wakeup_noloss
+	b	pnv_powersave_wakeup
 #endif
 
 EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 995728736677..4313c107da5d 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -20,6 +20,7 @@
 #include <asm/kvm_book3s_asm.h>
 #include <asm/opal.h>
 #include <asm/cpuidle.h>
+#include <asm/exception-64s.h>
 #include <asm/book3s/64/mmu-hash.h>
 #include <asm/mmu.h>
 
@@ -113,7 +114,7 @@ core_idle_lock_held:
  *
  * Address to 'rfid' to in r5
  */
-_GLOBAL(pnv_powersave_common)
+pnv_powersave_common:
 	/* Use r3 to pass state nap/sleep/winkle */
 	/* NAP is a state loss, we create a regs frame on the
 	 * stack, fill it up with the state we care about and
@@ -188,8 +189,8 @@ pnv_enter_arch207_idle_mode:
 	/* The following store to HSTATE_HWTHREAD_STATE(r13)  */
 	/* MUST occur in real mode, i.e. with the MMU off,    */
 	/* and the MMU must stay off until we clear this flag */
-	/* and test HSTATE_HWTHREAD_REQ(r13) in the system    */
-	/* reset interrupt vector in exceptions-64s.S.        */
+	/* and test HSTATE_HWTHREAD_REQ(r13) in               */
+	/* pnv_powersave_wakeup in this file.                 */
 	/* The reason is that another thread can switch the   */
 	/* MMU to a guest context whenever this flag is set   */
 	/* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
@@ -375,15 +376,42 @@ _GLOBAL(power9_idle_stop)
 	li	r4,1
 	b	pnv_powersave_common
 	/* No return */
+
+.global pnv_powersave_wakeup
+pnv_powersave_wakeup:
+BEGIN_FTR_SECTION
+	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+	bl	pnv_restore_hyp_resource
+
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	li	r0,KVM_HWTHREAD_IN_KERNEL
+	stb	r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi	r0,0
+	beq	1f
+	BRANCH_TO_KVM(r10, kvm_start_guest)
+1:
+#endif
+
+	/* Return SRR1 from power7_nap() */
+	mfspr	r3,SPRN_SRR1
+	blt	cr3,pnv_wakeup_noloss
+	b	pnv_wakeup_loss
+
 /*
- * Called from reset vector. Check whether we have woken up with
- * hypervisor state loss. If yes, restore hypervisor state and return
- * back to reset vector.
+ * Check whether we have woken up with hypervisor state loss.
+ * If yes, restore hypervisor state and return back to link.
  *
  * r13 - Contents of HSPRG0
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
-_GLOBAL(pnv_restore_hyp_resource)
+pnv_restore_hyp_resource:
 BEGIN_FTR_SECTION
 	ld	r2,PACATOC(r13);
 	/*
@@ -400,12 +428,9 @@ BEGIN_FTR_SECTION
 	 */
 	rldicl  r5,r5,4,60
 	cmpd	cr4,r5,r4
-	bge	cr4,pnv_wakeup_tb_loss
-	/*
-	 * Waking up without hypervisor state loss. Return to
-	 * reset vector
-	 */
-	blr
+	bge	cr4,pnv_wakeup_tb_loss /* returns to caller */
+
+	blr	/* Waking up without hypervisor state loss. */
 
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
@@ -433,8 +458,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	 */
 	bgt	cr3,.
 
-	blr	/* Return back to System Reset vector from where
-		   pnv_restore_hyp_resource was invoked */
+	blr	/* Waking up wihtout hypervisor state loss */
 
 /*
  * Called if waking up from idle state which can cause either partial or
@@ -446,18 +470,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  * cr3 - gt if waking up with partial/complete hypervisor state loss
  * cr4 - gt or eq if waking up from complete hypervisor state loss.
  */
-_GLOBAL(pnv_wakeup_tb_loss)
+pnv_wakeup_tb_loss:
 	ld	r1,PACAR1(r13)
 	/*
 	 * Before entering any idle state, the NVGPRs are saved in the stack
-	 * and they are restored before switching to the process context. Hence
-	 * until they are restored, they are free to be used.
+	 * and they are restored before switching to the process context.
+	 * Hence until they are restored, they are free to be used.
 	 *
 	 * Save SRR1 and LR in NVGPRs as they might be clobbered in
-	 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
-	 * to determine the wakeup reason if we branch to kvm_start_guest. LR
-	 * is required to return back to reset vector after hypervisor state
-	 * restore is complete.
+	 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required to
+	 * determine the wakeup reason if we branch to kvm_start_guest. LR is
+	 * required to return back to caller after hypervisor state restore is
+	 * complete.
 	 */
 	mflr	r17
 	mfspr	r16,SPRN_SRR1
@@ -637,8 +661,7 @@ hypervisor_state_restored:
 
 	mtspr	SPRN_SRR1,r16
 	mtlr	r17
-	blr	/* Return back to System Reset vector from where
-		   pnv_restore_hyp_resource was invoked */
+	blr		/* return to pnv_powersave_wakeup */
 
 fastsleep_workaround_at_exit:
 	li	r3,1
@@ -650,7 +673,8 @@ fastsleep_workaround_at_exit:
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
  */
-_GLOBAL(pnv_wakeup_loss)
+.global pnv_wakeup_loss
+pnv_wakeup_loss:
 	ld	r1,PACAR1(r13)
 BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
@@ -670,7 +694,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
  */
-_GLOBAL(pnv_wakeup_noloss)
+pnv_wakeup_noloss:
 	lbz	r0,PACA_NAPSTATELOST(r13)
 	cmpwi	r0,0
 	bne	pnv_wakeup_loss
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/8] powerpc/64s: stop using bit in HSPRG0 to test winkle
  2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
  2017-03-14  9:23 ` [PATCH 1/8] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
@ 2017-03-14  9:23 ` Nicholas Piggin
  2017-03-16 11:14   ` Gautham R Shenoy
  2017-03-14  9:23 ` [PATCH 3/8] powerpc/64s: use alternative feature patching Nicholas Piggin
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

The POWER8 idle code has a neat trick of programming the power on engine
to restore a low bit into HSPRG0, so idle wakeup code can test and see
if it has been programmed this way and therefore lost all state, and
avoiding the expensive full restore if not.

However this messes with our r13 PACA pointer, and requires HSPRG0 to
be written to throughout the exception handlers and idle wakeup, rather
than just once on kernel entry.

Remove this complexity and assume winkle sleeps always require a state
restore. This speedup is later re-introduced by counting per-core winkles
and setting a bitmap of threads with state loss when all are in winkle.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/exception-64s.h | 13 ++-----------
 arch/powerpc/kernel/exceptions-64s.S     | 21 +++------------------
 arch/powerpc/kernel/idle_book3s.S        | 23 ++++++++---------------
 arch/powerpc/platforms/powernv/idle.c    | 13 -------------
 4 files changed, 13 insertions(+), 57 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 14752eee3d0c..f5fe7c901b37 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -167,17 +167,14 @@ BEGIN_FTR_SECTION_NESTED(943)						\
 	std	ra,offset(r13);						\
 END_FTR_SECTION_NESTED(ftr,ftr,943)
 
-#define EXCEPTION_PROLOG_0_PACA(area)					\
+#define EXCEPTION_PROLOG_0(area)					\
+	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 */			\
 	OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);			\
 	HMT_MEDIUM;							\
 	std	r10,area+EX_R10(r13);	/* save r10 - r12 */		\
 	OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
 
-#define EXCEPTION_PROLOG_0(area)					\
-	GET_PACA(r13);							\
-	EXCEPTION_PROLOG_0_PACA(area)
-
 #define __EXCEPTION_PROLOG_1(area, extra, vec)				\
 	OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);		\
 	OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);		\
@@ -208,12 +205,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 	EXCEPTION_PROLOG_1(area, extra, vec);				\
 	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
-/* Have the PACA in r13 already */
-#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec)	\
-	EXCEPTION_PROLOG_0_PACA(area);					\
-	EXCEPTION_PROLOG_1(area, extra, vec);				\
-	EXCEPTION_PROLOG_PSERIES_1(label, h);
-
 #define __KVMTEST(h, n)							\
 	lbz	r10,HSTATE_IN_GUEST(r13);				\
 	cmpwi	r10,0;							\
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2f837a4a78a2..e390fcd04bcb 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -116,9 +116,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
 
 EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
 	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-	clrrdi	r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
-	EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
 				 IDLETEST, 0x100)
 
 EXC_REAL_END(system_reset, 0x100, 0x100)
@@ -148,14 +146,6 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
 	 * vector
 	 */
 	SET_SCRATCH0(r13)		/* save r13 */
-	/*
-	 * Running native on arch 2.06 or later, we may wakeup from winkle
-	 * inside machine check. If yes, then last bit of HSPRG0 would be set
-	 * to 1. Hence clear it unconditionally.
-	 */
-	GET_PACA(r13)
-	clrrdi	r13,r13,1
-	SET_PACA(r13)
 	EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
 	b	machine_check_powernv_early
@@ -339,7 +329,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
 	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
-	beq	4f			/* No, it wasn;t */
+	beq	4f			/* No, it wasn't */
 	/* Thread was in power saving mode. Go back to nap again. */
 	cmpwi	r11,2
 	blt	3f
@@ -369,13 +359,8 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	/*
 	 * Go back to winkle. Please note that this thread was woken up in
 	 * machine check from winkle and have not restored the per-subcore
-	 * state. Hence before going back to winkle, set last bit of HSPRG0
-	 * to 1. This will make sure that if this thread gets woken up
-	 * again at reset vector 0x100 then it will get chance to restore
-	 * the subcore state.
+	 * state.
 	 */
-	ori	r13,r13,1
-	SET_PACA(r13)
 	IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
 	/* No return */
 4:
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 4313c107da5d..405631b2c229 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -377,11 +377,12 @@ _GLOBAL(power9_idle_stop)
 	b	pnv_powersave_common
 	/* No return */
 
+/*
+ * Called from reset vector for powersave wakeups.
+ * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ */
 .global pnv_powersave_wakeup
 pnv_powersave_wakeup:
-BEGIN_FTR_SECTION
-	GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	bl	pnv_restore_hyp_resource
 
 	li	r0,PNV_THREAD_RUNNING
@@ -408,12 +409,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
  * Check whether we have woken up with hypervisor state loss.
  * If yes, restore hypervisor state and return back to link.
  *
- * r13 - Contents of HSPRG0
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
 pnv_restore_hyp_resource:
-BEGIN_FTR_SECTION
 	ld	r2,PACATOC(r13);
+
+BEGIN_FTR_SECTION
 	/*
 	 * POWER ISA 3. Use PSSCR to determine if we
 	 * are waking up from deep idle state
@@ -436,19 +437,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 	/*
 	 * POWER ISA 2.07 or less.
-	 * Check if last bit of HSPGR0 is set. This indicates whether we are
-	 * waking up from winkle.
+	 * Check if we slept with winkle.
 	 */
-	clrldi	r5,r13,63
-	clrrdi	r13,r13,1
-
-	/* Now that we are sure r13 is corrected, load TOC */
-	ld	r2,PACATOC(r13);
-	cmpwi	cr4,r5,1
-	mtspr	SPRN_HSPRG0,r13
-
 	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
 	cmpwi   cr2,r0,PNV_THREAD_NAP
+	cmpwi   cr4,r0,PNV_THREAD_WINKLE
 	bgt     cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
 
 	/*
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 4ee837e6391a..f9fe0ff0f03a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -53,19 +53,6 @@ static int pnv_save_sprs_for_deep_states(void)
 		uint64_t pir = get_hard_smp_processor_id(cpu);
 		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
 
-		if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
-			/*
-			 * HSPRG0 is used to store the cpu's pointer to paca.
-			 * Hence last 3 bits are guaranteed to be 0. Program
-			 * slw to restore HSPRG0 with 63rd bit set, so that
-			 * when a thread wakes up at 0x100 we can use this bit
-			 * to distinguish between fastsleep and deep winkle.
-			 * This is not necessary with stop/psscr since PLS
-			 * field of psscr indicates which state we are waking
-			 * up from.
-			 */
-			hsprg0_val |= 1;
-		}
 		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
 		if (rc != 0)
 			return rc;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/8] powerpc/64s: use alternative feature patching
  2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
  2017-03-14  9:23 ` [PATCH 1/8] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
  2017-03-14  9:23 ` [PATCH 2/8] powerpc/64s: stop using bit in HSPRG0 to test winkle Nicholas Piggin
@ 2017-03-14  9:23 ` Nicholas Piggin
  2017-03-16 11:21   ` Gautham R Shenoy
  2017-03-14  9:23 ` [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

This reduces the number of nops for POWER8.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/idle_book3s.S | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 405631b2c229..9284ea0762b1 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -383,7 +383,11 @@ _GLOBAL(power9_idle_stop)
  */
 .global pnv_powersave_wakeup
 pnv_powersave_wakeup:
-	bl	pnv_restore_hyp_resource
+BEGIN_FTR_SECTION
+	bl	pnv_restore_hyp_resource_arch300
+FTR_SECTION_ELSE
+	bl	pnv_restore_hyp_resource_arch207
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 
 	li	r0,PNV_THREAD_RUNNING
 	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
@@ -411,14 +415,13 @@ pnv_powersave_wakeup:
  *
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
-pnv_restore_hyp_resource:
-	ld	r2,PACATOC(r13);
-
-BEGIN_FTR_SECTION
+pnv_restore_hyp_resource_arch300:
 	/*
 	 * POWER ISA 3. Use PSSCR to determine if we
 	 * are waking up from deep idle state
 	 */
+	ld	r2,PACATOC(r13);
+
 	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
 	ld	r4,ADDROFF(pnv_first_deep_stop_state)(r5)
 
@@ -433,12 +436,14 @@ BEGIN_FTR_SECTION
 
 	blr	/* Waking up without hypervisor state loss. */
 
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
+/* Same calling convention as arch300 */
+pnv_restore_hyp_resource_arch207:
 	/*
 	 * POWER ISA 2.07 or less.
 	 * Check if we slept with winkle.
 	 */
+	ld	r2,PACATOC(r13);
+
 	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
 	cmpwi   cr2,r0,PNV_THREAD_NAP
 	cmpwi   cr4,r0,PNV_THREAD_WINKLE
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (2 preceding siblings ...)
  2017-03-14  9:23 ` [PATCH 3/8] powerpc/64s: use alternative feature patching Nicholas Piggin
@ 2017-03-14  9:23 ` Nicholas Piggin
  2017-03-16 12:40   ` Mahesh Jagannath Salgaonkar
  2017-03-14  9:23 ` [PATCH 5/8] powerpc/64s: use PACA_THREAD_IDLE_STATE only in POWER8 Nicholas Piggin
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

The ISA specifies power save wakeup can cause a machine check interrupt.
The machine check handler currently has code to handle that for POWER8,
but POWER9 crashes when trying to execute the P8 style sleep
instructions.

So queue up the machine check, then call into the idle code to wake up
as the system reset interrupt does, rather than attempting to sleep
again without going through the main idle path.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/reg.h       |  1 +
 arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fc879fd6bdae..8bbdfacce970 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -656,6 +656,7 @@
 #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
 #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
+#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
 #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e390fcd04bcb..5779d2d6a192 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
 	/* restore original r1. */			\
 	ld	r1,GPR1(r1)
 
+#ifdef CONFIG_PPC_P7_NAP
+EXC_COMMON_BEGIN(machine_check_idle_common)
+	bl	machine_check_queue_event
+	/*
+	 * Queue the machine check, then reload SRR1 and use it to set
+	 * CR3 according to pnv_powersave_wakeup convention.
+	 */
+	ld	r12,_MSR(r1)
+	rlwinm	r11,r12,47-31,30,31
+	cmpwi	cr3,r11,2
+
+	/*
+	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
+	 * system reset wakeup code.
+	 */
+	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
+	mtspr	SPRN_SRR1,r12
+	std	r12,_MSR(r1)
+
+	/*
+	 * Decrement MCE nesting after finishing with the stack.
+	 */
+	lhz	r11,PACA_IN_MCE(r13)
+	subi	r11,r11,1
+	sth	r11,PACA_IN_MCE(r13)
+	b	pnv_powersave_wakeup
+#endif
 	/*
 	 * Handle machine check early in real mode. We come here with
 	 * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
@@ -318,6 +345,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	bl	machine_check_early
 	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
+
 #ifdef	CONFIG_PPC_P7_NAP
 	/*
 	 * Check if thread was in power saving mode. We come here when any
@@ -328,43 +356,14 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
 	 *
 	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
-	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
-	beq	4f			/* No, it wasn't */
-	/* Thread was in power saving mode. Go back to nap again. */
-	cmpwi	r11,2
-	blt	3f
-	/* Supervisor/Hypervisor state loss */
-	li	r0,1
-	stb	r0,PACA_NAPSTATELOST(r13)
-3:	bl	machine_check_queue_event
-	MACHINE_CHECK_HANDLER_WINDUP
-	GET_PACA(r13)
-	ld	r1,PACAR1(r13)
-	/*
-	 * Check what idle state this CPU was in and go back to same mode
-	 * again.
-	 */
-	lbz	r3,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi	r3,PNV_THREAD_NAP
-	bgt	10f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
-	/* No return */
-10:
-	cmpwi	r3,PNV_THREAD_SLEEP
-	bgt	2f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
-	/* No return */
-
-2:
-	/*
-	 * Go back to winkle. Please note that this thread was woken up in
-	 * machine check from winkle and have not restored the per-subcore
-	 * state.
-	 */
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-	/* No return */
+	BEGIN_FTR_SECTION
+	rlwinm.	r11,r12,47-31,30,31
+	beq-	4f
+	BRANCH_TO_COMMON(r10, machine_check_idle_common)
 4:
+	END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
+
 	/*
 	 * Check if we are coming from hypervisor userspace. If yes then we
 	 * continue in host kernel in V mode to deliver the MC event.
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/8] powerpc/64s: use PACA_THREAD_IDLE_STATE only in POWER8
  2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (3 preceding siblings ...)
  2017-03-14  9:23 ` [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
@ 2017-03-14  9:23 ` Nicholas Piggin
  2017-03-16 11:54   ` Gautham R Shenoy
  2017-03-14  9:23 ` [PATCH 6/8] powerpc/64s: idle expand usable core idle state bits Nicholas Piggin
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

POWER9 does not use this field, so it should be moved into the POWER8
code. Update the documentation in the paca struct too.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/paca.h   | 12 ++++++++++--
 arch/powerpc/kernel/idle_book3s.S | 13 +++++++------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 708c3e592eeb..bbb59e226a9f 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -165,11 +165,19 @@ struct paca_struct {
 #endif
 
 #ifdef CONFIG_PPC_POWERNV
-	/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
+	/* CPU idle fields */
+
+	/*
+	 * Per-core word used to synchronize between threads. See
+	 * asm/cpuidle.h, PNV_CORE_IDLE_*
+	 */
 	u32 *core_idle_state_ptr;
-	u8 thread_idle_state;		/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
 	/* Mask to indicate thread id in core */
 	u8 thread_mask;
+
+	/* POWER8 specific fields */
+	/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
+	u8 thread_idle_state;
 	/* Mask to denote subcore sibling threads */
 	u8 subcore_sibling_mask;
 #endif
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 9284ea0762b1..9bdfba75a5e7 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -389,9 +389,6 @@ FTR_SECTION_ELSE
 	bl	pnv_restore_hyp_resource_arch207
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 
-	li	r0,PNV_THREAD_RUNNING
-	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
-
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	li	r0,KVM_HWTHREAD_IN_KERNEL
 	stb	r0,HSTATE_HWTHREAD_STATE(r13)
@@ -445,9 +442,13 @@ pnv_restore_hyp_resource_arch207:
 	ld	r2,PACATOC(r13);
 
 	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi   cr2,r0,PNV_THREAD_NAP
-	cmpwi   cr4,r0,PNV_THREAD_WINKLE
-	bgt     cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
+	cmpwi	cr2,r0,PNV_THREAD_NAP
+	cmpwi	cr4,r0,PNV_THREAD_WINKLE
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
+
+	bgt	cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
+
 
 	/*
 	 * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 6/8] powerpc/64s: idle expand usable core idle state bits
  2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (4 preceding siblings ...)
  2017-03-14  9:23 ` [PATCH 5/8] powerpc/64s: use PACA_THREAD_IDLE_STATE only in POWER8 Nicholas Piggin
@ 2017-03-14  9:23 ` Nicholas Piggin
  2017-03-16 12:10   ` Gautham R Shenoy
  2017-03-14  9:23 ` [PATCH 7/8] powerpc/64s: idle do not hold reservation longer than required Nicholas Piggin
  2017-03-14  9:23 ` [PATCH 8/8] powerpc/64s: idle POWER8 avoid full state loss recovery when possible Nicholas Piggin
  7 siblings, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

In preparation for adding more bits to the core idle state word,
move the lock bit up, and unlock by flipping the lock bit rather
than masking off all but the thread bits.

Add branch hints for atomic operations while we're here.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/cpuidle.h |  4 ++--
 arch/powerpc/kernel/idle_book3s.S  | 33 +++++++++++++++++----------------
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 155731557c9b..b9d9f960dffd 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -7,8 +7,8 @@
 #define PNV_THREAD_NAP                  1
 #define PNV_THREAD_SLEEP                2
 #define PNV_THREAD_WINKLE               3
-#define PNV_CORE_IDLE_LOCK_BIT          0x100
-#define PNV_CORE_IDLE_THREAD_BITS       0x0FF
+#define PNV_CORE_IDLE_LOCK_BIT          0x10000000
+#define PNV_CORE_IDLE_THREAD_BITS       0x000000FF
 
 /*
  * ============================ NOTE =================================
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 9bdfba75a5e7..1c91dc35c559 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -95,12 +95,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 core_idle_lock_held:
 	HMT_LOW
 3:	lwz	r15,0(r14)
-	andi.   r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	andis.	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	bne	3b
 	HMT_MEDIUM
 	lwarx	r15,0,r14
-	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bne	core_idle_lock_held
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	bne-	core_idle_lock_held
 	blr
 
 /*
@@ -213,8 +213,8 @@ pnv_enter_arch207_idle_mode:
 lwarx_loop1:
 	lwarx	r15,0,r14
 
-	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bnel	core_idle_lock_held
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	bnel-	core_idle_lock_held
 
 	andc	r15,r15,r7			/* Clear thread bit */
 
@@ -241,7 +241,7 @@ common_enter: /* common code for all the threads entering sleep or winkle */
 	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
 
 fastsleep_workaround_at_entry:
-	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	stwcx.	r15,0,r14
 	bne-	lwarx_loop1
 	isync
@@ -251,10 +251,10 @@ fastsleep_workaround_at_entry:
 	li	r4,1
 	bl	opal_config_cpu_idle_state
 
-	/* Clear Lock bit */
-	li	r0,0
+	/* Unlock */
+	xoris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	lwsync
-	stw	r0,0(r14)
+	stw	r15,0(r14)
 	b	common_enter
 
 enter_winkle:
@@ -302,8 +302,8 @@ power_enter_stop:
 
 lwarx_loop_stop:
 	lwarx   r15,0,r14
-	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bnel    core_idle_lock_held
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	bnel-	core_idle_lock_held
 	andc    r15,r15,r7                      /* Clear thread bit */
 
 	stwcx.  r15,0,r14
@@ -492,7 +492,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
 lwarx_loop2:
 	lwarx	r15,0,r14
-	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	/*
 	 * Lock bit is set in one of the 2 cases-
 	 * a. In the sleep/winkle enter path, the last thread is executing
@@ -501,9 +501,10 @@ lwarx_loop2:
 	 * workaround undo code or resyncing timebase or restoring context
 	 * In either case loop until the lock bit is cleared.
 	 */
-	bnel	core_idle_lock_held
+	bnel-	core_idle_lock_held
 
-	cmpwi	cr2,r15,0
+	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
+	cmpwi	cr2,r9,0
 
 	/*
 	 * At this stage
@@ -512,7 +513,7 @@ lwarx_loop2:
 	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
 	 */
 
-	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	stwcx.	r15,0,r14
 	bne-	lwarx_loop2
 	isync
@@ -602,7 +603,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mtspr	SPRN_WORC,r4
 
 clear_lock:
-	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+	xoris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	lwsync
 	stw	r15,0(r14)
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 7/8] powerpc/64s: idle do not hold reservation longer than required
  2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (5 preceding siblings ...)
  2017-03-14  9:23 ` [PATCH 6/8] powerpc/64s: idle expand usable core idle state bits Nicholas Piggin
@ 2017-03-14  9:23 ` Nicholas Piggin
  2017-03-16 12:43   ` Gautham R Shenoy
  2017-03-14  9:23 ` [PATCH 8/8] powerpc/64s: idle POWER8 avoid full state loss recovery when possible Nicholas Piggin
  7 siblings, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

When taking the core idle state lock, grab it immediately like a
regular lock, rather than adding more tests in there. Holding the lock
keeps it stable, so there is no need to do it whole holding the
reservation.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/idle_book3s.S | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 1c91dc35c559..3cb75907c5c5 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -488,12 +488,12 @@ BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 
-	lbz	r7,PACA_THREAD_MASK(r13)
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
-lwarx_loop2:
-	lwarx	r15,0,r14
-	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	lbz	r7,PACA_THREAD_MASK(r13)
+
 	/*
+	 * Take the core lock to synchronize against other threads.
+	 *
 	 * Lock bit is set in one of the 2 cases-
 	 * a. In the sleep/winkle enter path, the last thread is executing
 	 * fastsleep workaround code.
@@ -501,7 +501,14 @@ lwarx_loop2:
 	 * workaround undo code or resyncing timebase or restoring context
 	 * In either case loop until the lock bit is cleared.
 	 */
+1:
+	lwarx	r15,0,r14
+	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	bnel-	core_idle_lock_held
+	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
+	stwcx.	r15,0,r14
+	bne-	1b
+	isync
 
 	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
 	cmpwi	cr2,r9,0
@@ -513,11 +520,6 @@ lwarx_loop2:
 	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
 	 */
 
-	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
-	stwcx.	r15,0,r14
-	bne-	lwarx_loop2
-	isync
-
 BEGIN_FTR_SECTION
 	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
 	and	r4,r4,r15
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 8/8] powerpc/64s: idle POWER8 avoid full state loss recovery when possible
  2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
                   ` (6 preceding siblings ...)
  2017-03-14  9:23 ` [PATCH 7/8] powerpc/64s: idle do not hold reservation longer than required Nicholas Piggin
@ 2017-03-14  9:23 ` Nicholas Piggin
  2017-03-16 16:12   ` Gautham R Shenoy
  7 siblings, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-14  9:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin, Gautham R Shenoy, Vaidyanathan Srinivasan

If not all threads were in winkle, full state loss recovery is not
necessary and can be avoided. A previous patch removed this optimisation
due to some complexity with the implementation. Re-implement it by
counting the number of threads in winkle with the per-core idle state.
Only restore full state loss if all threads were in winkle.

This has a small window of false positives right before threads execute
winkle and just after they wake up, when the winkle count does not
reflect the true number of threads in winkle. This is not a significant
problem in comparison with even the minimum winkle duration. For
correctness, a false positive is not a problem (only false negatives
would be).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/cpuidle.h | 32 ++++++++++++++++++++++++---
 arch/powerpc/kernel/idle_book3s.S  | 45 +++++++++++++++++++++++++++++++++-----
 2 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index b9d9f960dffd..b68a5cd75ae8 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -2,13 +2,39 @@
 #define _ASM_POWERPC_CPUIDLE_H
 
 #ifdef CONFIG_PPC_POWERNV
-/* Used in powernv idle state management */
+/* Thread state used in powernv idle state management */
 #define PNV_THREAD_RUNNING              0
 #define PNV_THREAD_NAP                  1
 #define PNV_THREAD_SLEEP                2
 #define PNV_THREAD_WINKLE               3
-#define PNV_CORE_IDLE_LOCK_BIT          0x10000000
-#define PNV_CORE_IDLE_THREAD_BITS       0x000000FF
+
+/*
+ * Core state used in powernv idle for POWER8.
+ *
+ * The lock bit synchronizes updates to the state, as well as parts of the
+ * sleep/wake code (see kernel/idle_book3s.S).
+ *
+ * Bottom 8 bits track the idle state of each thread. Bit is cleared before
+ * the thread executes an idle instruction (nap/sleep/winkle).
+ *
+ * Then there is winkle tracking. A core does not lose complete state
+ * until every thread is in winkle. So the winkle count field counts the
+ * number of threads in winkle (small window of false positives is okay
+ * around the sleep/wake, so long as there are no false negatives).
+ *
+ * When the winkle count reaches 8 (the COUNT_ALL_BIT becomes set), then
+ * the THREAD_WINKLE_BITS are set, which indicate which threads have not
+ * yet woken from the winkle state.
+ */
+#define PNV_CORE_IDLE_LOCK_BIT			0x10000000
+
+#define PNV_CORE_IDLE_WINKLE_COUNT		0x00010000
+#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT	0x00080000
+#define PNV_CORE_IDLE_WINKLE_COUNT_BITS		0x000F0000
+#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT	8
+#define PNV_CORE_IDLE_THREAD_WINKLE_BITS	0x0000FF00
+
+#define PNV_CORE_IDLE_THREAD_BITS       	0x000000FF
 
 /*
  * ============================ NOTE =================================
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 3cb75907c5c5..87518a1dca50 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -210,15 +210,20 @@ pnv_enter_arch207_idle_mode:
 	/* Sleep or winkle */
 	lbz	r7,PACA_THREAD_MASK(r13)
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+	li	r5,0
+	beq	cr3,3f
+	lis	r5,PNV_CORE_IDLE_WINKLE_COUNT@h
+3:
 lwarx_loop1:
 	lwarx	r15,0,r14
 
 	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
 	bnel-	core_idle_lock_held
 
+	add	r15,r15,r5			/* Add if winkle */
 	andc	r15,r15,r7			/* Clear thread bit */
 
-	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
 
 /*
  * If cr0 = 0, then current thread is the last thread of the core entering
@@ -437,16 +442,14 @@ pnv_restore_hyp_resource_arch300:
 pnv_restore_hyp_resource_arch207:
 	/*
 	 * POWER ISA 2.07 or less.
-	 * Check if we slept with winkle.
+	 * Check if we slept with sleep or winkle.
 	 */
 	ld	r2,PACATOC(r13);
 
-	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi	cr2,r0,PNV_THREAD_NAP
-	cmpwi	cr4,r0,PNV_THREAD_WINKLE
+	lbz	r4,PACA_THREAD_IDLE_STATE(r13)
 	li	r0,PNV_THREAD_RUNNING
 	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
-
+	cmpwi	cr2,r4,PNV_THREAD_NAP
 	bgt	cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
 
 
@@ -467,7 +470,12 @@ pnv_restore_hyp_resource_arch207:
  *
  * r13 - PACA
  * cr3 - gt if waking up with partial/complete hypervisor state loss
+ *
+ * If ISA300:
  * cr4 - gt or eq if waking up from complete hypervisor state loss.
+ *
+ * If ISA207:
+ * r4 - PACA_THREAD_IDLE_STATE
  */
 pnv_wakeup_tb_loss:
 	ld	r1,PACAR1(r13)
@@ -482,6 +490,7 @@ pnv_wakeup_tb_loss:
 	 * required to return back to caller after hypervisor state restore is
 	 * complete.
 	 */
+	mr	r18,r4
 	mflr	r17
 	mfspr	r16,SPRN_SRR1
 BEGIN_FTR_SECTION
@@ -517,10 +526,34 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	 * At this stage
 	 * cr2 - eq if first thread to wakeup in core
 	 * cr3-  gt if waking up with partial/complete hypervisor state loss
+	 * ISA300:
 	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
 	 */
 
 BEGIN_FTR_SECTION
+	/*
+	 * Were we in winkle?
+	 * If yes, check if all threads were in winkle, decrement our
+	 * winkle count, set all thread winkle bits if all were in winkle.
+	 * Check if our thread has a winkle bit set, and set cr4 accordingly
+	 * (to match ISA300, above).
+	 */
+	cmpwi	r18,PNV_THREAD_WINKLE
+	bne	2f
+	andis.	r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
+	subis	r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
+	beq	2f
+	ori	r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
+2:
+	/* Shift thread bit to winkle mask, then test if this thread is set,
+	 * and remove it from the winkle bits */
+	slwi	r8,r7,8
+	and	r8,r8,r15
+	andc	r15,r15,r8
+	cmpwi	cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
+
+	cmpwi	cr4,r18,PNV_THREAD_WINKLE
+
 	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
 	and	r4,r4,r15
 	cmpwi	r4,0	/* Check if first in subcore */
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/8] powerpc/64s: stop using bit in HSPRG0 to test winkle
  2017-03-14  9:23 ` [PATCH 2/8] powerpc/64s: stop using bit in HSPRG0 to test winkle Nicholas Piggin
@ 2017-03-16 11:14   ` Gautham R Shenoy
  0 siblings, 0 replies; 24+ messages in thread
From: Gautham R Shenoy @ 2017-03-16 11:14 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, Gautham R Shenoy, Vaidyanathan Srinivasan

Hi Nick,

On Tue, Mar 14, 2017 at 07:23:43PM +1000, Nicholas Piggin wrote:
> The POWER8 idle code has a neat trick of programming the power on engine
> to restore a low bit into HSPRG0, so idle wakeup code can test and see
> if it has been programmed this way and therefore lost all state, and
> avoiding the expensive full restore if not.
> 
> However this messes with our r13 PACA pointer, and requires HSPRG0 to
> be written to throughout the exception handlers and idle wakeup, rather
> than just once on kernel entry.
> 
> Remove this complexity and assume winkle sleeps always require a state
> restore. This speedup is later re-introduced by counting per-core winkles
> and setting a bitmap of threads with state loss when all are in winkle.
>

Looks good to me.

> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>


--
Thanks and Regards
gautham.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/8] powerpc/64s: use alternative feature patching
  2017-03-14  9:23 ` [PATCH 3/8] powerpc/64s: use alternative feature patching Nicholas Piggin
@ 2017-03-16 11:21   ` Gautham R Shenoy
  0 siblings, 0 replies; 24+ messages in thread
From: Gautham R Shenoy @ 2017-03-16 11:21 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, Gautham R Shenoy, Vaidyanathan Srinivasan

On Tue, Mar 14, 2017 at 07:23:44PM +1000, Nicholas Piggin wrote:
> This reduces the number of nops for POWER8.

Nice!
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

> ---
>  arch/powerpc/kernel/idle_book3s.S | 19 ++++++++++++-------
>  1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 405631b2c229..9284ea0762b1 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -383,7 +383,11 @@ _GLOBAL(power9_idle_stop)
>   */
>  .global pnv_powersave_wakeup
>  pnv_powersave_wakeup:
> -	bl	pnv_restore_hyp_resource
> +BEGIN_FTR_SECTION
> +	bl	pnv_restore_hyp_resource_arch300
> +FTR_SECTION_ELSE
> +	bl	pnv_restore_hyp_resource_arch207
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
> 
>  	li	r0,PNV_THREAD_RUNNING
>  	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
> @@ -411,14 +415,13 @@ pnv_powersave_wakeup:
>   *
>   * cr3 - set to gt if waking up with partial/complete hypervisor state loss
>   */
> -pnv_restore_hyp_resource:
> -	ld	r2,PACATOC(r13);
> -
> -BEGIN_FTR_SECTION
> +pnv_restore_hyp_resource_arch300:
>  	/*
>  	 * POWER ISA 3. Use PSSCR to determine if we
>  	 * are waking up from deep idle state
>  	 */
> +	ld	r2,PACATOC(r13);
> +
>  	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
>  	ld	r4,ADDROFF(pnv_first_deep_stop_state)(r5)
> 
> @@ -433,12 +436,14 @@ BEGIN_FTR_SECTION
> 
>  	blr	/* Waking up without hypervisor state loss. */
> 
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
> -
> +/* Same calling convention as arch300 */
> +pnv_restore_hyp_resource_arch207:
>  	/*
>  	 * POWER ISA 2.07 or less.
>  	 * Check if we slept with winkle.
>  	 */
> +	ld	r2,PACATOC(r13);
> +
>  	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
>  	cmpwi   cr2,r0,PNV_THREAD_NAP
>  	cmpwi   cr4,r0,PNV_THREAD_WINKLE
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/8] powerpc/64s: use PACA_THREAD_IDLE_STATE only in POWER8
  2017-03-14  9:23 ` [PATCH 5/8] powerpc/64s: use PACA_THREAD_IDLE_STATE only in POWER8 Nicholas Piggin
@ 2017-03-16 11:54   ` Gautham R Shenoy
  2017-03-16 12:16     ` Nicholas Piggin
  0 siblings, 1 reply; 24+ messages in thread
From: Gautham R Shenoy @ 2017-03-16 11:54 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, Gautham R Shenoy, Vaidyanathan Srinivasan

Hi Nick,

On Tue, Mar 14, 2017 at 07:23:46PM +1000, Nicholas Piggin wrote:
> POWER9 does not use this field, so it should be moved into the POWER8
> code. Update the documentation in the paca struct too.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/include/asm/paca.h   | 12 ++++++++++--
>  arch/powerpc/kernel/idle_book3s.S | 13 +++++++------
>  2 files changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 708c3e592eeb..bbb59e226a9f 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -165,11 +165,19 @@ struct paca_struct {
>  #endif
> 
>  #ifdef CONFIG_PPC_POWERNV
> -	/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
> +	/* CPU idle fields */
> +
> +	/*
> +	 * Per-core word used to synchronize between threads. See
> +	 * asm/cpuidle.h, PNV_CORE_IDLE_*
> +	 */
>  	u32 *core_idle_state_ptr;
> -	u8 thread_idle_state;		/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
>  	/* Mask to indicate thread id in core */
>  	u8 thread_mask;
> +
> +	/* POWER8 specific fields */
> +	/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
> +	u8 thread_idle_state;

I am planning to use this in POWER9 DD1 to distinguish between a
SRESET received when the thread was running vs when it was in stop.
Unfortunately the SRR1[46:47] are not cleared in the former case. So
we need a way in software to distinguish between the two.

>  	/* Mask to denote subcore sibling threads */
>  	u8 subcore_sibling_mask;
>  #endif
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 9284ea0762b1..9bdfba75a5e7 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -389,9 +389,6 @@ FTR_SECTION_ELSE
>  	bl	pnv_restore_hyp_resource_arch207
>  ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
> 
> -	li	r0,PNV_THREAD_RUNNING
> -	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
> -
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
>  	li	r0,KVM_HWTHREAD_IN_KERNEL
>  	stb	r0,HSTATE_HWTHREAD_STATE(r13)
> @@ -445,9 +442,13 @@ pnv_restore_hyp_resource_arch207:
>  	ld	r2,PACATOC(r13);
> 
>  	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
> -	cmpwi   cr2,r0,PNV_THREAD_NAP
> -	cmpwi   cr4,r0,PNV_THREAD_WINKLE
> -	bgt     cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
> +	cmpwi	cr2,r0,PNV_THREAD_NAP
> +	cmpwi	cr4,r0,PNV_THREAD_WINKLE
> +	li	r0,PNV_THREAD_RUNNING
> +	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
> +
> +	bgt	cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
> +
> 
>  	/*
>  	 * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 6/8] powerpc/64s: idle expand usable core idle state bits
  2017-03-14  9:23 ` [PATCH 6/8] powerpc/64s: idle expand usable core idle state bits Nicholas Piggin
@ 2017-03-16 12:10   ` Gautham R Shenoy
  0 siblings, 0 replies; 24+ messages in thread
From: Gautham R Shenoy @ 2017-03-16 12:10 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, Gautham R Shenoy, Vaidyanathan Srinivasan

Hi Nick,

On Tue, Mar 14, 2017 at 07:23:47PM +1000, Nicholas Piggin wrote:
> In preparation for adding more bits to the core idle state word,
> move the lock bit up, and unlock by flipping the lock bit rather
> than masking off all but the thread bits.
> 
> Add branch hints for atomic operations while we're here.

Looks good.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/include/asm/cpuidle.h |  4 ++--
>  arch/powerpc/kernel/idle_book3s.S  | 33 +++++++++++++++++----------------
>  2 files changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
> index 155731557c9b..b9d9f960dffd 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -7,8 +7,8 @@
>  #define PNV_THREAD_NAP                  1
>  #define PNV_THREAD_SLEEP                2
>  #define PNV_THREAD_WINKLE               3
> -#define PNV_CORE_IDLE_LOCK_BIT          0x100
> -#define PNV_CORE_IDLE_THREAD_BITS       0x0FF
> +#define PNV_CORE_IDLE_LOCK_BIT          0x10000000
> +#define PNV_CORE_IDLE_THREAD_BITS       0x000000FF
> 
>  /*
>   * ============================ NOTE =================================
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 9bdfba75a5e7..1c91dc35c559 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -95,12 +95,12 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
>  core_idle_lock_held:
>  	HMT_LOW
>  3:	lwz	r15,0(r14)
> -	andi.   r15,r15,PNV_CORE_IDLE_LOCK_BIT
> +	andis.	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
>  	bne	3b
>  	HMT_MEDIUM
>  	lwarx	r15,0,r14
> -	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
> -	bne	core_idle_lock_held
> +	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> +	bne-	core_idle_lock_held
>  	blr
> 
>  /*
> @@ -213,8 +213,8 @@ pnv_enter_arch207_idle_mode:
>  lwarx_loop1:
>  	lwarx	r15,0,r14
> 
> -	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
> -	bnel	core_idle_lock_held
> +	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> +	bnel-	core_idle_lock_held
> 
>  	andc	r15,r15,r7			/* Clear thread bit */
> 
> @@ -241,7 +241,7 @@ common_enter: /* common code for all the threads entering sleep or winkle */
>  	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
> 
>  fastsleep_workaround_at_entry:
> -	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
> +	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
>  	stwcx.	r15,0,r14
>  	bne-	lwarx_loop1
>  	isync
> @@ -251,10 +251,10 @@ fastsleep_workaround_at_entry:
>  	li	r4,1
>  	bl	opal_config_cpu_idle_state
> 
> -	/* Clear Lock bit */
> -	li	r0,0
> +	/* Unlock */
> +	xoris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
>  	lwsync
> -	stw	r0,0(r14)
> +	stw	r15,0(r14)
>  	b	common_enter
> 
>  enter_winkle:
> @@ -302,8 +302,8 @@ power_enter_stop:
> 
>  lwarx_loop_stop:
>  	lwarx   r15,0,r14
> -	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
> -	bnel    core_idle_lock_held
> +	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> +	bnel-	core_idle_lock_held
>  	andc    r15,r15,r7                      /* Clear thread bit */
> 
>  	stwcx.  r15,0,r14
> @@ -492,7 +492,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>  	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
>  lwarx_loop2:
>  	lwarx	r15,0,r14
> -	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
> +	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
>  	/*
>  	 * Lock bit is set in one of the 2 cases-
>  	 * a. In the sleep/winkle enter path, the last thread is executing
> @@ -501,9 +501,10 @@ lwarx_loop2:
>  	 * workaround undo code or resyncing timebase or restoring context
>  	 * In either case loop until the lock bit is cleared.
>  	 */
> -	bnel	core_idle_lock_held
> +	bnel-	core_idle_lock_held
> 
> -	cmpwi	cr2,r15,0
> +	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
> +	cmpwi	cr2,r9,0
> 
>  	/*
>  	 * At this stage
> @@ -512,7 +513,7 @@ lwarx_loop2:
>  	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
>  	 */
> 
> -	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
> +	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
>  	stwcx.	r15,0,r14
>  	bne-	lwarx_loop2
>  	isync
> @@ -602,7 +603,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
>  	mtspr	SPRN_WORC,r4
> 
>  clear_lock:
> -	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
> +	xoris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
>  	lwsync
>  	stw	r15,0(r14)
> 
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 5/8] powerpc/64s: use PACA_THREAD_IDLE_STATE only in POWER8
  2017-03-16 11:54   ` Gautham R Shenoy
@ 2017-03-16 12:16     ` Nicholas Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-16 12:16 UTC (permalink / raw)
  To: Gautham R Shenoy; +Cc: linuxppc-dev, Vaidyanathan Srinivasan

On Thu, 16 Mar 2017 17:24:03 +0530
Gautham R Shenoy <ego@linux.vnet.ibm.com> wrote:

> Hi Nick,
> 
> On Tue, Mar 14, 2017 at 07:23:46PM +1000, Nicholas Piggin wrote:
> > POWER9 does not use this field, so it should be moved into the POWER8
> > code. Update the documentation in the paca struct too.
> > 
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/include/asm/paca.h   | 12 ++++++++++--
> >  arch/powerpc/kernel/idle_book3s.S | 13 +++++++------
> >  2 files changed, 17 insertions(+), 8 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> > index 708c3e592eeb..bbb59e226a9f 100644
> > --- a/arch/powerpc/include/asm/paca.h
> > +++ b/arch/powerpc/include/asm/paca.h
> > @@ -165,11 +165,19 @@ struct paca_struct {
> >  #endif
> > 
> >  #ifdef CONFIG_PPC_POWERNV
> > -	/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
> > +	/* CPU idle fields */
> > +
> > +	/*
> > +	 * Per-core word used to synchronize between threads. See
> > +	 * asm/cpuidle.h, PNV_CORE_IDLE_*
> > +	 */
> >  	u32 *core_idle_state_ptr;
> > -	u8 thread_idle_state;		/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
> >  	/* Mask to indicate thread id in core */
> >  	u8 thread_mask;
> > +
> > +	/* POWER8 specific fields */
> > +	/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
> > +	u8 thread_idle_state;  
> 
> I am planning to use this in POWER9 DD1 to distinguish between a
> SRESET received when the thread was running vs when it was in stop.
> Unfortunately the SRR1[46:47] are not cleared in the former case. So
> we need a way in software to distinguish between the two.

Okay, we can skip this for now. It was not a critical part of my
patches, just a tidy up.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-14  9:23 ` [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
@ 2017-03-16 12:40   ` Mahesh Jagannath Salgaonkar
  2017-03-16 13:05     ` Nicholas Piggin
  2017-03-17  2:49     ` Nicholas Piggin
  0 siblings, 2 replies; 24+ messages in thread
From: Mahesh Jagannath Salgaonkar @ 2017-03-16 12:40 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev; +Cc: Gautham R Shenoy

On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
> The ISA specifies power save wakeup can cause a machine check interrupt.
> The machine check handler currently has code to handle that for POWER8,
> but POWER9 crashes when trying to execute the P8 style sleep
> instructions.
> 
> So queue up the machine check, then call into the idle code to wake up
> as the system reset interrupt does, rather than attempting to sleep
> again without going through the main idle path.
> 
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/include/asm/reg.h       |  1 +
>  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
>  2 files changed, 35 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index fc879fd6bdae..8bbdfacce970 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -656,6 +656,7 @@
>  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
>  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
>  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
> +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
>  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
>  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
>  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index e390fcd04bcb..5779d2d6a192 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
>  	/* restore original r1. */			\
>  	ld	r1,GPR1(r1)
> 
> +#ifdef CONFIG_PPC_P7_NAP
> +EXC_COMMON_BEGIN(machine_check_idle_common)
> +	bl	machine_check_queue_event
> +	/*
> +	 * Queue the machine check, then reload SRR1 and use it to set
> +	 * CR3 according to pnv_powersave_wakeup convention.
> +	 */
> +	ld	r12,_MSR(r1)
> +	rlwinm	r11,r12,47-31,30,31
> +	cmpwi	cr3,r11,2
> +
> +	/*
> +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
> +	 * system reset wakeup code.
> +	 */
> +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
> +	mtspr	SPRN_SRR1,r12
> +	std	r12,_MSR(r1)
> +
> +	/*
> +	 * Decrement MCE nesting after finishing with the stack.
> +	 */
> +	lhz	r11,PACA_IN_MCE(r13)
> +	subi	r11,r11,1
> +	sth	r11,PACA_IN_MCE(r13)

Looks like we are not winding up.. Shouldn't we ? What if we may end up
in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
anything ?

> +	b	pnv_powersave_wakeup
> +#endif
>  	/*

[...]

Rest looks good to me.

Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
-Mahesh.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 7/8] powerpc/64s: idle do not hold reservation longer than required
  2017-03-14  9:23 ` [PATCH 7/8] powerpc/64s: idle do not hold reservation longer than required Nicholas Piggin
@ 2017-03-16 12:43   ` Gautham R Shenoy
  2017-03-16 12:55     ` Nicholas Piggin
  0 siblings, 1 reply; 24+ messages in thread
From: Gautham R Shenoy @ 2017-03-16 12:43 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, Gautham R Shenoy, Vaidyanathan Srinivasan

Hi Nick,

On Tue, Mar 14, 2017 at 07:23:48PM +1000, Nicholas Piggin wrote:
> When taking the core idle state lock, grab it immediately like a
> regular lock, rather than adding more tests in there. Holding the lock
> keeps it stable, so there is no need to do it whole holding the
> reservation.

I agree with this patch. Just a minor query

> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kernel/idle_book3s.S | 20 +++++++++++---------
>  1 file changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 1c91dc35c559..3cb75907c5c5 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -488,12 +488,12 @@ BEGIN_FTR_SECTION
>  	CHECK_HMI_INTERRUPT
>  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> 
> -	lbz	r7,PACA_THREAD_MASK(r13)
>  	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
> -lwarx_loop2:
> -	lwarx	r15,0,r14
> -	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> +	lbz	r7,PACA_THREAD_MASK(r13)

Is reversing the order of loads into r7 and r14 intentional?

Other than that,
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

> +
>  	/*
> +	 * Take the core lock to synchronize against other threads.
> +	 *
>  	 * Lock bit is set in one of the 2 cases-
>  	 * a. In the sleep/winkle enter path, the last thread is executing
>  	 * fastsleep workaround code.
> @@ -501,7 +501,14 @@ lwarx_loop2:
>  	 * workaround undo code or resyncing timebase or restoring context
>  	 * In either case loop until the lock bit is cleared.
>  	 */
> +1:
> +	lwarx	r15,0,r14
> +	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
>  	bnel-	core_idle_lock_held
> +	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
> +	stwcx.	r15,0,r14
> +	bne-	1b
> +	isync
> 
>  	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
>  	cmpwi	cr2,r9,0
> @@ -513,11 +520,6 @@ lwarx_loop2:
>  	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
>  	 */
> 
> -	oris	r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -	stwcx.	r15,0,r14
> -	bne-	lwarx_loop2
> -	isync
> -
>  BEGIN_FTR_SECTION
>  	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
>  	and	r4,r4,r15
> -- 
> 2.11.0
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 7/8] powerpc/64s: idle do not hold reservation longer than required
  2017-03-16 12:43   ` Gautham R Shenoy
@ 2017-03-16 12:55     ` Nicholas Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-16 12:55 UTC (permalink / raw)
  To: Gautham R Shenoy; +Cc: linuxppc-dev, Vaidyanathan Srinivasan

On Thu, 16 Mar 2017 18:13:28 +0530
Gautham R Shenoy <ego@linux.vnet.ibm.com> wrote:

> Hi Nick,
> 
> On Tue, Mar 14, 2017 at 07:23:48PM +1000, Nicholas Piggin wrote:
> > When taking the core idle state lock, grab it immediately like a
> > regular lock, rather than adding more tests in there. Holding the lock
> > keeps it stable, so there is no need to do it whole holding the
> > reservation.  
> 
> I agree with this patch. Just a minor query
> 
> > 
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/kernel/idle_book3s.S | 20 +++++++++++---------
> >  1 file changed, 11 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> > index 1c91dc35c559..3cb75907c5c5 100644
> > --- a/arch/powerpc/kernel/idle_book3s.S
> > +++ b/arch/powerpc/kernel/idle_book3s.S
> > @@ -488,12 +488,12 @@ BEGIN_FTR_SECTION
> >  	CHECK_HMI_INTERRUPT
> >  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> > 
> > -	lbz	r7,PACA_THREAD_MASK(r13)
> >  	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
> > -lwarx_loop2:
> > -	lwarx	r15,0,r14
> > -	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> > +	lbz	r7,PACA_THREAD_MASK(r13)  
> 
> Is reversing the order of loads into r7 and r14 intentional?

Oh, yes I guess it is because we use r14 result first. I should have
mentioned it but I forgot about it. Probably they decode together,
but you might get them in different cycles.

Thanks for the review!

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-16 12:40   ` Mahesh Jagannath Salgaonkar
@ 2017-03-16 13:05     ` Nicholas Piggin
  2017-03-16 13:19       ` Gautham R Shenoy
  2017-03-17  2:49     ` Nicholas Piggin
  1 sibling, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-16 13:05 UTC (permalink / raw)
  To: Mahesh Jagannath Salgaonkar; +Cc: linuxppc-dev, Gautham R Shenoy

On Thu, 16 Mar 2017 18:10:48 +0530
Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
> > The ISA specifies power save wakeup can cause a machine check interrupt.
> > The machine check handler currently has code to handle that for POWER8,
> > but POWER9 crashes when trying to execute the P8 style sleep
> > instructions.
> > 
> > So queue up the machine check, then call into the idle code to wake up
> > as the system reset interrupt does, rather than attempting to sleep
> > again without going through the main idle path.
> > 
> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/include/asm/reg.h       |  1 +
> >  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
> >  2 files changed, 35 insertions(+), 35 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> > index fc879fd6bdae..8bbdfacce970 100644
> > --- a/arch/powerpc/include/asm/reg.h
> > +++ b/arch/powerpc/include/asm/reg.h
> > @@ -656,6 +656,7 @@
> >  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
> >  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
> >  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
> > +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
> >  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
> >  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
> >  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
> > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > index e390fcd04bcb..5779d2d6a192 100644
> > --- a/arch/powerpc/kernel/exceptions-64s.S
> > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
> >  	/* restore original r1. */			\
> >  	ld	r1,GPR1(r1)
> > 
> > +#ifdef CONFIG_PPC_P7_NAP
> > +EXC_COMMON_BEGIN(machine_check_idle_common)
> > +	bl	machine_check_queue_event
> > +	/*
> > +	 * Queue the machine check, then reload SRR1 and use it to set
> > +	 * CR3 according to pnv_powersave_wakeup convention.
> > +	 */
> > +	ld	r12,_MSR(r1)
> > +	rlwinm	r11,r12,47-31,30,31
> > +	cmpwi	cr3,r11,2
> > +
> > +	/*
> > +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
> > +	 * system reset wakeup code.
> > +	 */
> > +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
> > +	mtspr	SPRN_SRR1,r12
> > +	std	r12,_MSR(r1)
> > +
> > +	/*
> > +	 * Decrement MCE nesting after finishing with the stack.
> > +	 */
> > +	lhz	r11,PACA_IN_MCE(r13)
> > +	subi	r11,r11,1
> > +	sth	r11,PACA_IN_MCE(r13)  
> 
> Looks like we are not winding up.. Shouldn't we ? What if we may end up
> in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
> anything ?

Hmm, no I think you're right. Thanks, good catch. But can we do it with
just setting PACA_NAPSTATELOST?

> 
> > +	b	pnv_powersave_wakeup
> > +#endif
> >  	/*  
> 
> [...]
> 
> Rest looks good to me.
> 
> Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-16 13:05     ` Nicholas Piggin
@ 2017-03-16 13:19       ` Gautham R Shenoy
  2017-03-20  5:22         ` Mahesh Jagannath Salgaonkar
  0 siblings, 1 reply; 24+ messages in thread
From: Gautham R Shenoy @ 2017-03-16 13:19 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Mahesh Jagannath Salgaonkar, linuxppc-dev, Gautham R Shenoy

Hi,

On Thu, Mar 16, 2017 at 11:05:20PM +1000, Nicholas Piggin wrote:
> On Thu, 16 Mar 2017 18:10:48 +0530
> Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
> > On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
> > > The ISA specifies power save wakeup can cause a machine check interrupt.
> > > The machine check handler currently has code to handle that for POWER8,
> > > but POWER9 crashes when trying to execute the P8 style sleep
> > > instructions.
> > > 
> > > So queue up the machine check, then call into the idle code to wake up
> > > as the system reset interrupt does, rather than attempting to sleep
> > > again without going through the main idle path.
> > > 
> > > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > > ---
> > >  arch/powerpc/include/asm/reg.h       |  1 +
> > >  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
> > >  2 files changed, 35 insertions(+), 35 deletions(-)
> > > 
> > > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> > > index fc879fd6bdae..8bbdfacce970 100644
> > > --- a/arch/powerpc/include/asm/reg.h
> > > +++ b/arch/powerpc/include/asm/reg.h
> > > @@ -656,6 +656,7 @@
> > >  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
> > >  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
> > >  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
> > > +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
> > >  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
> > >  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
> > >  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
> > > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > > index e390fcd04bcb..5779d2d6a192 100644
> > > --- a/arch/powerpc/kernel/exceptions-64s.S
> > > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > > @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
> > >  	/* restore original r1. */			\
> > >  	ld	r1,GPR1(r1)
> > > 
> > > +#ifdef CONFIG_PPC_P7_NAP
> > > +EXC_COMMON_BEGIN(machine_check_idle_common)
> > > +	bl	machine_check_queue_event
> > > +	/*
> > > +	 * Queue the machine check, then reload SRR1 and use it to set
> > > +	 * CR3 according to pnv_powersave_wakeup convention.
> > > +	 */
> > > +	ld	r12,_MSR(r1)
> > > +	rlwinm	r11,r12,47-31,30,31
> > > +	cmpwi	cr3,r11,2
> > > +
> > > +	/*
> > > +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
> > > +	 * system reset wakeup code.
> > > +	 */
> > > +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
> > > +	mtspr	SPRN_SRR1,r12
> > > +	std	r12,_MSR(r1)
> > > +
> > > +	/*
> > > +	 * Decrement MCE nesting after finishing with the stack.
> > > +	 */
> > > +	lhz	r11,PACA_IN_MCE(r13)
> > > +	subi	r11,r11,1
> > > +	sth	r11,PACA_IN_MCE(r13)  
> > 
> > Looks like we are not winding up.. Shouldn't we ? What if we may end up
> > in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
> > anything ?

Nice catch! This can occur if SRR1[46:47] == 0b01.

>
> Hmm, no I think you're right. Thanks, good catch. But can we do it with
> just setting PACA_NAPSTATELOST?

Unconditionally setting PACA_NAPSTATELOST should be sufficient.

> 
> > 
> > > +	b	pnv_powersave_wakeup
> > > +#endif
> > >  	/*  
> > 
> > [...]
> > 
> > Rest looks good to me.
> > 
> > Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>
> 
> Thanks,
> Nick
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 8/8] powerpc/64s: idle POWER8 avoid full state loss recovery when possible
  2017-03-14  9:23 ` [PATCH 8/8] powerpc/64s: idle POWER8 avoid full state loss recovery when possible Nicholas Piggin
@ 2017-03-16 16:12   ` Gautham R Shenoy
  2017-03-17  5:24     ` Nicholas Piggin
  0 siblings, 1 reply; 24+ messages in thread
From: Gautham R Shenoy @ 2017-03-16 16:12 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, Gautham R Shenoy, Vaidyanathan Srinivasan

Hi Nick,

On Tue, Mar 14, 2017 at 07:23:49PM +1000, Nicholas Piggin wrote:
> If not all threads were in winkle, full state loss recovery is not
> necessary and can be avoided. A previous patch removed this optimisation
> due to some complexity with the implementation. Re-implement it by
> counting the number of threads in winkle with the per-core idle state.
> Only restore full state loss if all threads were in winkle.
> 
> This has a small window of false positives right before threads execute
> winkle and just after they wake up, when the winkle count does not
> reflect the true number of threads in winkle. This is not a significant
> problem in comparison with even the minimum winkle duration. For
> correctness, a false positive is not a problem (only false negatives
> would be).

I like the technique.. Some comments below.

> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/include/asm/cpuidle.h | 32 ++++++++++++++++++++++++---
>  arch/powerpc/kernel/idle_book3s.S  | 45 +++++++++++++++++++++++++++++++++-----
>  2 files changed, 68 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
> index b9d9f960dffd..b68a5cd75ae8 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -2,13 +2,39 @@
>  #define _ASM_POWERPC_CPUIDLE_H
> 
>  #ifdef CONFIG_PPC_POWERNV
> -/* Used in powernv idle state management */
> +/* Thread state used in powernv idle state management */
>  #define PNV_THREAD_RUNNING              0
>  #define PNV_THREAD_NAP                  1
>  #define PNV_THREAD_SLEEP                2
>  #define PNV_THREAD_WINKLE               3
> -#define PNV_CORE_IDLE_LOCK_BIT          0x10000000
> -#define PNV_CORE_IDLE_THREAD_BITS       0x000000FF
> +
> +/*
> + * Core state used in powernv idle for POWER8.
> + *
> + * The lock bit synchronizes updates to the state, as well as parts of the
> + * sleep/wake code (see kernel/idle_book3s.S).
> + *
> + * Bottom 8 bits track the idle state of each thread. Bit is cleared before
> + * the thread executes an idle instruction (nap/sleep/winkle).
> + *
> + * Then there is winkle tracking. A core does not lose complete state
> + * until every thread is in winkle. So the winkle count field counts the
> + * number of threads in winkle (small window of false positives is okay
> + * around the sleep/wake, so long as there are no false negatives).
> + *
> + * When the winkle count reaches 8 (the COUNT_ALL_BIT becomes set), then
> + * the THREAD_WINKLE_BITS are set, which indicate which threads have not
> + * yet woken from the winkle state.
> + */
> +#define PNV_CORE_IDLE_LOCK_BIT			0x10000000
> +
> +#define PNV_CORE_IDLE_WINKLE_COUNT		0x00010000
> +#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT	0x00080000
> +#define PNV_CORE_IDLE_WINKLE_COUNT_BITS		0x000F0000
> +#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT	8
> +#define PNV_CORE_IDLE_THREAD_WINKLE_BITS	0x0000FF00
> +
> +#define PNV_CORE_IDLE_THREAD_BITS       	0x000000FF
> 
>  /*
>   * ============================ NOTE =================================
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 3cb75907c5c5..87518a1dca50 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -210,15 +210,20 @@ pnv_enter_arch207_idle_mode:
>  	/* Sleep or winkle */
>  	lbz	r7,PACA_THREAD_MASK(r13)
>  	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
> +	li	r5,0
> +	beq	cr3,3f
> +	lis	r5,PNV_CORE_IDLE_WINKLE_COUNT@h
> +3:
>  lwarx_loop1:
>  	lwarx	r15,0,r14
> 
>  	andis.	r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
>  	bnel-	core_idle_lock_held
> 
> +	add	r15,r15,r5			/* Add if winkle */
>  	andc	r15,r15,r7			/* Clear thread bit */
> 
> -	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
> +	andi.	r9,r15,PNV_CORE_IDLE_THREAD_BITS
> 
>  /*
>   * If cr0 = 0, then current thread is the last thread of the core entering
> @@ -437,16 +442,14 @@ pnv_restore_hyp_resource_arch300:
>  pnv_restore_hyp_resource_arch207:
>  	/*
>  	 * POWER ISA 2.07 or less.
> -	 * Check if we slept with winkle.
> +	 * Check if we slept with sleep or winkle.
>  	 */
>  	ld	r2,PACATOC(r13);
> 
> -	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
> -	cmpwi	cr2,r0,PNV_THREAD_NAP
> -	cmpwi	cr4,r0,PNV_THREAD_WINKLE
> +	lbz	r4,PACA_THREAD_IDLE_STATE(r13)
>  	li	r0,PNV_THREAD_RUNNING
>  	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
> -
> +	cmpwi	cr2,r4,PNV_THREAD_NAP
>  	bgt	cr2,pnv_wakeup_tb_loss	/* Either sleep or Winkle */
> 
> 
> @@ -467,7 +470,12 @@ pnv_restore_hyp_resource_arch207:
>   *
>   * r13 - PACA
>   * cr3 - gt if waking up with partial/complete hypervisor state loss
> + *
> + * If ISA300:
>   * cr4 - gt or eq if waking up from complete hypervisor state loss.
> + *
> + * If ISA207:
> + * r4 - PACA_THREAD_IDLE_STATE
>   */
>  pnv_wakeup_tb_loss:
>  	ld	r1,PACAR1(r13)
> @@ -482,6 +490,7 @@ pnv_wakeup_tb_loss:
>  	 * required to return back to caller after hypervisor state restore is
>  	 * complete.
>  	 */
> +	mr	r18,r4
>  	mflr	r17
>  	mfspr	r16,SPRN_SRR1
>  BEGIN_FTR_SECTION
> @@ -517,10 +526,34 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
>  	 * At this stage
>  	 * cr2 - eq if first thread to wakeup in core
>  	 * cr3-  gt if waking up with partial/complete hypervisor state loss
> +	 * ISA300:
>  	 * cr4 - gt or eq if waking up from complete hypervisor state loss.

For ISA 300, we need to restore hypervisor thread resources if we are
waking up a state that is as deep or deeper than
pnv_first_deep_stop_state. In that case, we expect either the "gt" bit
or the "eq" bit of cr4 to be set.

Before this patch, on ISA 207, cr4 would be "eq" if we are waking up
from winkle.


>  	 */
> 
>  BEGIN_FTR_SECTION
> +	/*
> +	 * Were we in winkle?
> +	 * If yes, check if all threads were in winkle, decrement our
> +	 * winkle count, set all thread winkle bits if all were in winkle.
> +	 * Check if our thread has a winkle bit set, and set cr4 accordingly
> +	 * (to match ISA300, above).
> +	 */
> +	cmpwi	r18,PNV_THREAD_WINKLE
> +	bne	2f
> +	andis.	r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
> +	subis	r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
> +	beq	2f
> +	ori	r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */

So PNV_CORE_IDLE_THREAD_WINKLE_BITS will be set by the first waking
thread in a winkle'd core. Subsequent waking thread(s) can only clear
their respective bits from the winkle bits.

> +2:
> +	/* Shift thread bit to winkle mask, then test if this thread is set,
> +	 * and remove it from the winkle bits */
> +	slwi	r8,r7,8
> +	and	r8,r8,r15
> +	andc	r15,r15,r8
> +	cmpwi	cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */

Very clever indeed! So we seem to be doing the following:

winkle_entry(thread_bit)
{
	atomic_inc(core_winkle_count);
}

winkle_exit(thread_bit)
{
	atomic {
	       if (core_winkle_count == 8)
	       	  set all thread bits in core_winkle_mask;

	       if (thread_bit set in core_winkle_mask) {
	       	  cr4 will be "gt".
	       }

	       clear thread_bit from core_winkle_mask;
	}
}

I would suggest documenting this a bit more documentation given the
subtlety.

> +
> +	cmpwi	cr4,r18,PNV_THREAD_WINKLE

This second comparision seems to be undoing the benefit of the
optimization by setting "eq" in cr4 for every thread that wakes up
from winkle irrespective of whether the core has winkled or not.

However, this is quite subtle, so good chances that my addled brain
has gotten it wrong.

Case 1: If a thread were waking up from winkle. We have 2
        possibilities

      a) The entire core winkled at least once ever since this thread
      went to winkle. In this case we want cr4 to be "eq" or "gt" so
      that we restore the hypervisor resources.
      Now, for this case the first thread in the core that wakes up would find
      count == PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT and hence would have
      set PNV_CORE_IDLE_THREAD_WINKLE_BITS which includes this
      thread's bit as well. Hence, this thread
      would find its bit set in r8, thereby cr4 would be "gt". Which
      is good for us!

      b) The entire core has not entered winkle. In which case we
      haven't lost any hypervisor resources, so no need to restore
      these. We would like cr4 to have neither "eq" nor "gt" set here.
      In this case, no thread that has woken up prior to this thread
      would find count == PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT. Hence
      none of those earlier threads would set
      PNV_CORE_IDLE_THREAD_WINKLE_BITS. Thus, for this thread, r8
      would be 0, and hence cr4 would be "lt". Which is what we want.
      

Case 2: If the thread is waking up from fast-sleep. In which case we
      haven't lost any hypervisor resources, so no need to restore
      these. We would like cr4 to have neither "eq" nor "gt" set here.

      While entering fastsleep, this thread wouldn't have contributed
      to the winkle count. Thus any thread that would have woken up
      before this thread would not find
      winkle count == PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT
      and hence wouldn't have set the bits in
      PNV_CORE_IDLE_THREAD_WINKLE_BITS. Thus, r8 would
      be 0, and hence cr4 would be "lt".


So it seems to be the case that the first comparison suffices. What am
I missing here ?

> +
>  	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
>  	and	r4,r4,r15
>  	cmpwi	r4,0	/* Check if first in subcore */
> -- 
> 2.11.0
> 

--
Thanks and Regards
gautham.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-16 12:40   ` Mahesh Jagannath Salgaonkar
  2017-03-16 13:05     ` Nicholas Piggin
@ 2017-03-17  2:49     ` Nicholas Piggin
  2017-03-17  5:15       ` Nicholas Piggin
  1 sibling, 1 reply; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-17  2:49 UTC (permalink / raw)
  To: Mahesh Jagannath Salgaonkar; +Cc: linuxppc-dev, Gautham R Shenoy

On Thu, 16 Mar 2017 18:10:48 +0530
Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
> > The ISA specifies power save wakeup can cause a machine check interrupt.
> > The machine check handler currently has code to handle that for POWER8,
> > but POWER9 crashes when trying to execute the P8 style sleep
> > instructions.
> > 
> > So queue up the machine check, then call into the idle code to wake up
> > as the system reset interrupt does, rather than attempting to sleep
> > again without going through the main idle path.
> > 
> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/include/asm/reg.h       |  1 +
> >  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
> >  2 files changed, 35 insertions(+), 35 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> > index fc879fd6bdae..8bbdfacce970 100644
> > --- a/arch/powerpc/include/asm/reg.h
> > +++ b/arch/powerpc/include/asm/reg.h
> > @@ -656,6 +656,7 @@
> >  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
> >  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
> >  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
> > +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
> >  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
> >  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
> >  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
> > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > index e390fcd04bcb..5779d2d6a192 100644
> > --- a/arch/powerpc/kernel/exceptions-64s.S
> > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
> >  	/* restore original r1. */			\
> >  	ld	r1,GPR1(r1)
> > 
> > +#ifdef CONFIG_PPC_P7_NAP
> > +EXC_COMMON_BEGIN(machine_check_idle_common)
> > +	bl	machine_check_queue_event
> > +	/*
> > +	 * Queue the machine check, then reload SRR1 and use it to set
> > +	 * CR3 according to pnv_powersave_wakeup convention.
> > +	 */
> > +	ld	r12,_MSR(r1)
> > +	rlwinm	r11,r12,47-31,30,31
> > +	cmpwi	cr3,r11,2
> > +
> > +	/*
> > +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
> > +	 * system reset wakeup code.
> > +	 */
> > +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
> > +	mtspr	SPRN_SRR1,r12
> > +	std	r12,_MSR(r1)
> > +
> > +	/*
> > +	 * Decrement MCE nesting after finishing with the stack.
> > +	 */
> > +	lhz	r11,PACA_IN_MCE(r13)
> > +	subi	r11,r11,1
> > +	sth	r11,PACA_IN_MCE(r13)  
> 
> Looks like we are not winding up.. Shouldn't we ? What if we may end up
> in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
> anything ?

Hmm, on second look, I don't think any non-volatile GPRs are overwritten
in this path. But this MCE is a slow path, and it is a much longer path
than the system reset idle wakeup... So I'll add the napstatelost with
a comment.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-17  2:49     ` Nicholas Piggin
@ 2017-03-17  5:15       ` Nicholas Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-17  5:15 UTC (permalink / raw)
  To: Mahesh Jagannath Salgaonkar; +Cc: linuxppc-dev, Gautham R Shenoy

On Fri, 17 Mar 2017 12:49:27 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> On Thu, 16 Mar 2017 18:10:48 +0530
> Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
> > On 03/14/2017 02:53 PM, Nicholas Piggin wrote:  

> > Looks like we are not winding up.. Shouldn't we ? What if we may end up
> > in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
> > anything ?  
> 
> Hmm, on second look, I don't think any non-volatile GPRs are overwritten
> in this path. But this MCE is a slow path, and it is a much longer path
> than the system reset idle wakeup... So I'll add the napstatelost with
> a comment.

On third look, I'll just add the comment. The windup does not restore
non-volatile GPRs either, and in general we're careful not to use them
in exception handlers. So I think it's okay.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 8/8] powerpc/64s: idle POWER8 avoid full state loss recovery when possible
  2017-03-16 16:12   ` Gautham R Shenoy
@ 2017-03-17  5:24     ` Nicholas Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2017-03-17  5:24 UTC (permalink / raw)
  To: Gautham R Shenoy; +Cc: linuxppc-dev, Vaidyanathan Srinivasan

On Thu, 16 Mar 2017 21:42:01 +0530
Gautham R Shenoy <ego@linux.vnet.ibm.com> wrote:

Hey, thanks for the review.

> Hi Nick,
> 
> On Tue, Mar 14, 2017 at 07:23:49PM +1000, Nicholas Piggin wrote:
> > If not all threads were in winkle, full state loss recovery is not
> > necessary and can be avoided. A previous patch removed this optimisation
> > due to some complexity with the implementation. Re-implement it by
> > counting the number of threads in winkle with the per-core idle state.
> > Only restore full state loss if all threads were in winkle.
> > 
> > This has a small window of false positives right before threads execute
> > winkle and just after they wake up, when the winkle count does not
> > reflect the true number of threads in winkle. This is not a significant
> > problem in comparison with even the minimum winkle duration. For
> > correctness, a false positive is not a problem (only false negatives
> > would be).  


> > @@ -517,10 +526,34 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> >  	 * At this stage
> >  	 * cr2 - eq if first thread to wakeup in core
> >  	 * cr3-  gt if waking up with partial/complete hypervisor state loss
> > +	 * ISA300:
> >  	 * cr4 - gt or eq if waking up from complete hypervisor state loss.  
> 
> For ISA 300, we need to restore hypervisor thread resources if we are
> waking up a state that is as deep or deeper than
> pnv_first_deep_stop_state. In that case, we expect either the "gt" bit
> or the "eq" bit of cr4 to be set.
> 
> Before this patch, on ISA 207, cr4 would be "eq" if we are waking up
> from winkle.

Yes, that was based on testing the thread idle state (nap/sleep/winkle).
The condition has become more complex now, so it moved below.


> >  	 */
> > 
> >  BEGIN_FTR_SECTION
> > +	/*
> > +	 * Were we in winkle?
> > +	 * If yes, check if all threads were in winkle, decrement our
> > +	 * winkle count, set all thread winkle bits if all were in winkle.
> > +	 * Check if our thread has a winkle bit set, and set cr4 accordingly
> > +	 * (to match ISA300, above).
> > +	 */
> > +	cmpwi	r18,PNV_THREAD_WINKLE
> > +	bne	2f
> > +	andis.	r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
> > +	subis	r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
> > +	beq	2f
> > +	ori	r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */  
> 
> So PNV_CORE_IDLE_THREAD_WINKLE_BITS will be set by the first waking
> thread in a winkle'd core. Subsequent waking thread(s) can only clear
> their respective bits from the winkle bits.

Yes. It was easier to do it on the wakeup side because the sleep side
does not take the lock, only uses atomic load/store.


> > +2:
> > +	/* Shift thread bit to winkle mask, then test if this thread is set,
> > +	 * and remove it from the winkle bits */
> > +	slwi	r8,r7,8
> > +	and	r8,r8,r15
> > +	andc	r15,r15,r8
> > +	cmpwi	cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */  
> 
> Very clever indeed! So we seem to be doing the following:
> 
> winkle_entry(thread_bit)
> {
> 	atomic_inc(core_winkle_count);
> }
> 
> winkle_exit(thread_bit)
> {
> 	atomic {
> 	       if (core_winkle_count == 8)
> 	       	  set all thread bits in core_winkle_mask;
> 
> 	       if (thread_bit set in core_winkle_mask) {
> 	       	  cr4 will be "gt".
> 	       }
> 
> 	       clear thread_bit from core_winkle_mask;
> 	}
> }
> 
> I would suggest documenting this a bit more documentation given the
> subtlety.

Yes, commenting the algorithm in pseudo C is probably a good idea. It is
a bit tricky to follow.

> 
> > +
> > +	cmpwi	cr4,r18,PNV_THREAD_WINKLE  
> 
> This second comparision seems to be undoing the benefit of the
> optimization by setting "eq" in cr4 for every thread that wakes up
> from winkle irrespective of whether the core has winkled or not.

No, good catch. That was some leftover debugging code that got in there
I think. Hmm, I'm not sure how much of my testing that's invalidated, so
I'll have to fix it up and re-run some more tests.

> 
> However, this is quite subtle, so good chances that my addled brain
> has gotten it wrong.
> 
> Case 1: If a thread were waking up from winkle. We have 2
>         possibilities
> 
>       a) The entire core winkled at least once ever since this thread
>       went to winkle. In this case we want cr4 to be "eq" or "gt" so
>       that we restore the hypervisor resources.
>       Now, for this case the first thread in the core that wakes up would find
>       count == PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT and hence would have
>       set PNV_CORE_IDLE_THREAD_WINKLE_BITS which includes this
>       thread's bit as well. Hence, this thread
>       would find its bit set in r8, thereby cr4 would be "gt". Which
>       is good for us!
> 
>       b) The entire core has not entered winkle. In which case we
>       haven't lost any hypervisor resources, so no need to restore
>       these. We would like cr4 to have neither "eq" nor "gt" set here.
>       In this case, no thread that has woken up prior to this thread
>       would find count == PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT. Hence
>       none of those earlier threads would set
>       PNV_CORE_IDLE_THREAD_WINKLE_BITS. Thus, for this thread, r8
>       would be 0, and hence cr4 would be "lt". Which is what we want.
>       
> 
> Case 2: If the thread is waking up from fast-sleep. In which case we
>       haven't lost any hypervisor resources, so no need to restore
>       these. We would like cr4 to have neither "eq" nor "gt" set here.
> 
>       While entering fastsleep, this thread wouldn't have contributed
>       to the winkle count. Thus any thread that would have woken up
>       before this thread would not find
>       winkle count == PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT
>       and hence wouldn't have set the bits in
>       PNV_CORE_IDLE_THREAD_WINKLE_BITS. Thus, r8 would
>       be 0, and hence cr4 would be "lt".
> 
> 
> So it seems to be the case that the first comparison suffices. What am
> I missing here ?

No, nothing you're quite right. Very thorough, thank you.

I'll respin and repost with the feedback.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state
  2017-03-16 13:19       ` Gautham R Shenoy
@ 2017-03-20  5:22         ` Mahesh Jagannath Salgaonkar
  0 siblings, 0 replies; 24+ messages in thread
From: Mahesh Jagannath Salgaonkar @ 2017-03-20  5:22 UTC (permalink / raw)
  To: ego, Nicholas Piggin; +Cc: linuxppc-dev

On 03/16/2017 06:49 PM, Gautham R Shenoy wrote:
> Hi,
> 
> On Thu, Mar 16, 2017 at 11:05:20PM +1000, Nicholas Piggin wrote:
>> On Thu, 16 Mar 2017 18:10:48 +0530
>> Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
>>
>>> On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
>>>> The ISA specifies power save wakeup can cause a machine check interrupt.
>>>> The machine check handler currently has code to handle that for POWER8,
>>>> but POWER9 crashes when trying to execute the P8 style sleep
>>>> instructions.
>>>>
>>>> So queue up the machine check, then call into the idle code to wake up
>>>> as the system reset interrupt does, rather than attempting to sleep
>>>> again without going through the main idle path.
>>>>
>>>> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
>>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>>> ---
>>>>  arch/powerpc/include/asm/reg.h       |  1 +
>>>>  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
>>>>  2 files changed, 35 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
>>>> index fc879fd6bdae..8bbdfacce970 100644
>>>> --- a/arch/powerpc/include/asm/reg.h
>>>> +++ b/arch/powerpc/include/asm/reg.h
>>>> @@ -656,6 +656,7 @@
>>>>  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
>>>>  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
>>>>  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
>>>> +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
>>>>  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
>>>>  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
>>>>  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
>>>> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
>>>> index e390fcd04bcb..5779d2d6a192 100644
>>>> --- a/arch/powerpc/kernel/exceptions-64s.S
>>>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>>>> @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
>>>>  	/* restore original r1. */			\
>>>>  	ld	r1,GPR1(r1)
>>>>
>>>> +#ifdef CONFIG_PPC_P7_NAP
>>>> +EXC_COMMON_BEGIN(machine_check_idle_common)
>>>> +	bl	machine_check_queue_event
>>>> +	/*
>>>> +	 * Queue the machine check, then reload SRR1 and use it to set
>>>> +	 * CR3 according to pnv_powersave_wakeup convention.
>>>> +	 */
>>>> +	ld	r12,_MSR(r1)
>>>> +	rlwinm	r11,r12,47-31,30,31
>>>> +	cmpwi	cr3,r11,2
>>>> +
>>>> +	/*
>>>> +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
>>>> +	 * system reset wakeup code.
>>>> +	 */
>>>> +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
>>>> +	mtspr	SPRN_SRR1,r12
>>>> +	std	r12,_MSR(r1)
>>>> +
>>>> +	/*
>>>> +	 * Decrement MCE nesting after finishing with the stack.
>>>> +	 */
>>>> +	lhz	r11,PACA_IN_MCE(r13)
>>>> +	subi	r11,r11,1
>>>> +	sth	r11,PACA_IN_MCE(r13)  
>>>
>>> Looks like we are not winding up.. Shouldn't we ? What if we may end up
>>> in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
>>> anything ?
> 
> Nice catch! This can occur if SRR1[46:47] == 0b01.
> 
>>
>> Hmm, no I think you're right. Thanks, good catch. But can we do it with
>> just setting PACA_NAPSTATELOST?
> 
> Unconditionally setting PACA_NAPSTATELOST should be sufficient.

Agree, that should take care.

> 
>>
>>>
>>>> +	b	pnv_powersave_wakeup
>>>> +#endif
>>>>  	/*  
>>>
>>> [...]
>>>
>>> Rest looks good to me.
>>>
>>> Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>
>>
>> Thanks,
>> Nick
>>

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2017-03-20  5:23 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-14  9:23 [PATCH 0/8] idle fixes and changes for POWER8 and POWER9 Nicholas Piggin
2017-03-14  9:23 ` [PATCH 1/8] powerpc/64s: move remaining system reset idle code into idle_book3s.S Nicholas Piggin
2017-03-14  9:23 ` [PATCH 2/8] powerpc/64s: stop using bit in HSPRG0 to test winkle Nicholas Piggin
2017-03-16 11:14   ` Gautham R Shenoy
2017-03-14  9:23 ` [PATCH 3/8] powerpc/64s: use alternative feature patching Nicholas Piggin
2017-03-16 11:21   ` Gautham R Shenoy
2017-03-14  9:23 ` [PATCH 4/8] powerpc/64s: fix POWER9 machine check handler from stop state Nicholas Piggin
2017-03-16 12:40   ` Mahesh Jagannath Salgaonkar
2017-03-16 13:05     ` Nicholas Piggin
2017-03-16 13:19       ` Gautham R Shenoy
2017-03-20  5:22         ` Mahesh Jagannath Salgaonkar
2017-03-17  2:49     ` Nicholas Piggin
2017-03-17  5:15       ` Nicholas Piggin
2017-03-14  9:23 ` [PATCH 5/8] powerpc/64s: use PACA_THREAD_IDLE_STATE only in POWER8 Nicholas Piggin
2017-03-16 11:54   ` Gautham R Shenoy
2017-03-16 12:16     ` Nicholas Piggin
2017-03-14  9:23 ` [PATCH 6/8] powerpc/64s: idle expand usable core idle state bits Nicholas Piggin
2017-03-16 12:10   ` Gautham R Shenoy
2017-03-14  9:23 ` [PATCH 7/8] powerpc/64s: idle do not hold reservation longer than required Nicholas Piggin
2017-03-16 12:43   ` Gautham R Shenoy
2017-03-16 12:55     ` Nicholas Piggin
2017-03-14  9:23 ` [PATCH 8/8] powerpc/64s: idle POWER8 avoid full state loss recovery when possible Nicholas Piggin
2017-03-16 16:12   ` Gautham R Shenoy
2017-03-17  5:24     ` Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.