All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/2] powerpc/timer - large decrementer support
@ 2016-05-10  4:57 Oliver O'Halloran
  2016-05-10  4:57 ` [PATCH v3 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Oliver O'Halloran @ 2016-05-10  4:57 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Oliver O'Halloran, Michael Neuling, Balbir Singh, Jack Miller

POWER ISA v3 adds large decrementer (LD) mode of operation which increases
the size of the decrementer register from 32 bits to an implementation
defined with of up to 64 bits.

This patch adds support for the LD on processors with the CPU_FTR_ARCH_300
cpu feature flag set. For CPUs with this feature LD mode is enabled when
when the ibm,dec-bits devicetree property is supplied for the boot CPU. The
decrementer value is a signed quantity (with negative values indicating a
pending exception) and this property is required to find the maximum
positive decrementer value. If this property is not supplied then the
traditional decrementer width of 32 bits is assumed and LD mode is disabled.

This patch was based on initial work by Jack Miller.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Jack Miller <jack@codezen.org>
---
 arch/powerpc/include/asm/reg.h  |  1 +
 arch/powerpc/include/asm/time.h |  6 +--
 arch/powerpc/kernel/time.c      | 92 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index f5f4c66bbbc9..ff581ed1ab9d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -332,6 +332,7 @@
 #define   LPCR_AIL_0	0x00000000	/* MMU off exception offset 0x0 */
 #define   LPCR_AIL_3	0x01800000	/* MMU on exception offset 0xc00...4xxx */
 #define   LPCR_ONL	0x00040000	/* online - PURR/SPURR count */
+#define   LPCR_LD	0x00020000	/* large decremeter */
 #define   LPCR_PECE	0x0001f000	/* powersave exit cause enable */
 #define     LPCR_PECEDP	0x00010000	/* directed priv dbells cause exit */
 #define     LPCR_PECEDH	0x00008000	/* directed hyp dbells cause exit */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 1092fdd7e737..09211640a0e0 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -146,7 +146,7 @@ static inline void set_tb(unsigned int upper, unsigned int lower)
  * in auto-reload mode.  The problem is PIT stops counting when it
  * hits zero.  If it would wrap, we could use it just like a decrementer.
  */
-static inline unsigned int get_dec(void)
+static inline u64 get_dec(void)
 {
 #if defined(CONFIG_40x)
 	return (mfspr(SPRN_PIT));
@@ -160,10 +160,10 @@ static inline unsigned int get_dec(void)
  * in when the decrementer generates its interrupt: on the 1 to 0
  * transition for Book E/4xx, but on the 0 to -1 transition for others.
  */
-static inline void set_dec(int val)
+static inline void set_dec(u64 val)
 {
 #if defined(CONFIG_40x)
-	mtspr(SPRN_PIT, val);
+	mtspr(SPRN_PIT, (u32) val);
 #else
 #ifndef CONFIG_BOOKE
 	--val;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 81b0900a39ee..0656e80cadbf 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -95,7 +95,8 @@ static struct clocksource clocksource_timebase = {
 	.read         = timebase_read,
 };
 
-#define DECREMENTER_MAX	0x7fffffff
+#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
+u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
 
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev);
@@ -503,7 +504,7 @@ static void __timer_interrupt(void)
 		__this_cpu_inc(irq_stat.timer_irqs_event);
 	} else {
 		now = *next_tb - now;
-		if (now <= DECREMENTER_MAX)
+		if (now <= decrementer_max)
 			set_dec((int)now);
 		/* We may have raced with new irq work */
 		if (test_irq_work_pending())
@@ -534,7 +535,7 @@ void timer_interrupt(struct pt_regs * regs)
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
 	 */
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 
 	/* Some implementations of hotplug will get timer interrupts while
 	 * offline, just ignore these and we also need to set
@@ -582,9 +583,9 @@ static void generic_suspend_disable_irqs(void)
 	 * with suspending.
 	 */
 
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 	local_irq_disable();
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 }
 
 static void generic_suspend_enable_irqs(void)
@@ -865,7 +866,7 @@ static int decrementer_set_next_event(unsigned long evt,
 
 static int decrementer_shutdown(struct clock_event_device *dev)
 {
-	decrementer_set_next_event(DECREMENTER_MAX, dev);
+	decrementer_set_next_event(decrementer_max, dev);
 	return 0;
 }
 
@@ -891,6 +892,76 @@ static void register_decrementer_clockevent(int cpu)
 	clockevents_register_device(dec);
 }
 
+static inline bool cpu_has_large_dec(void)
+{
+	return cpu_has_feature(CPU_FTR_ARCH_300);
+}
+
+static inline bool large_dec_enabled(void)
+{
+	return (mfspr(SPRN_LPCR) & LPCR_LD) == LPCR_LD;
+}
+
+/* enables the large decrementer for the current CPU */
+static void enable_large_decrementer(void)
+{
+	/* do we have a large decrementer? */
+	if (!cpu_has_large_dec())
+		return;
+
+	/* do we need a large decrementer? */
+	if (decrementer_max <= DECREMENTER_DEFAULT_MAX)
+		return;
+
+	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
+
+	if (!large_dec_enabled()) {
+		decrementer_max = DECREMENTER_DEFAULT_MAX;
+
+		pr_warn("time_init: Failed to enable large decrementer on CPU %d\n",
+			smp_processor_id());
+	}
+}
+
+static void __init set_decrementer_max(void)
+{
+	struct device_node *cpu;
+	const __be32 *fp;
+	u64 bits = 32;
+
+	cpu = of_find_node_by_type(NULL, "cpu");
+	if (cpu)
+		fp = of_get_property(cpu, "ibm,dec-bits", NULL);
+
+	if (cpu && fp) {
+		u64 dt_bits = of_read_number(fp, 1);
+
+		bits = dt_bits;
+		if (bits > 64)
+			bits = 64;
+		else if (bits < 32)
+			bits = 32;
+
+		WARN(bits != dt_bits, "firmware supplied invalid ibm,dec-bits");
+
+		/*
+		 * Firmware says we support large dec but this cpu doesn't we
+		 * should warn about it. We can still limp along with default
+		 * 32 bit dec, but something is broken.
+		 */
+		if (!cpu_has_large_dec()) {
+			WARN_ON(bits > 32);
+			bits = 32;
+		}
+
+		/* calculate the signed maximum given this many bits */
+		decrementer_max = (1ul << (bits - 1)) - 1;
+	}
+
+	pr_info("time_init: %llu bit decrementer (max: %llx)\n",
+		bits, decrementer_max);
+}
+
 static void __init init_decrementer_clockevent(void)
 {
 	int cpu = smp_processor_id();
@@ -898,7 +969,7 @@ static void __init init_decrementer_clockevent(void)
 	clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
 
 	decrementer_clockevent.max_delta_ns =
-		clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
+		clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
 	decrementer_clockevent.min_delta_ns =
 		clockevent_delta2ns(2, &decrementer_clockevent);
 
@@ -907,6 +978,9 @@ static void __init init_decrementer_clockevent(void)
 
 void secondary_cpu_time_init(void)
 {
+	/* Enable the large decrementer (if we need to) */
+	enable_large_decrementer();
+
 	/* Start the decrementer on CPUs that have manual control
 	 * such as BookE
 	 */
@@ -972,6 +1046,10 @@ void __init time_init(void)
 	vdso_data->tb_update_count = 0;
 	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
 
+	/* initialise and enable the large decrementer (if we have one) */
+	set_decrementer_max();
+	enable_large_decrementer();
+
 	/* Start the decrementer on CPUs that have manual control
 	 * such as BookE
 	 */
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 2/2] KVM: PPC: hypervisor large decrementer support
  2016-05-10  4:57 [PATCH v3 1/2] powerpc/timer - large decrementer support Oliver O'Halloran
@ 2016-05-10  4:57 ` Oliver O'Halloran
  2016-05-11  5:05 ` [PATCH v3 1/2] powerpc/timer - " Balbir Singh
  2016-05-31  6:25 ` Michael Neuling
  2 siblings, 0 replies; 16+ messages in thread
From: Oliver O'Halloran @ 2016-05-10  4:57 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran, Paul Mackerras, Michael Neuling

Power ISAv3 extends the width of the decrementer register from 32 bits.
The enlarged register width is implementation dependent, but reads from
these registers are automatically sign extended to produce a 64 bit
output when operating in large mode. The HDEC always operates in large
mode while the DEC register can be operated in 32bit mode or large mode
depending on the setting of the LPCR.LD bit.

Currently the hypervisor assumes that reads from the DEC and HDEC
register produce a 32 bit result which it sign extends to 64 bits using
the extsw instruction. This behaviour can result in the guest DEC
register value being corrupted by the hypervisor when the guest is
operating in LD mode since the results of the extsw instruction only
depends on the value of bit 31 in the register to be sign extended.

This patch adds the GET_DEC() and GET_HDEC() assembly macros for reading
from the decrementer registers. These macros will return the current
decrementer value as a 64 bit quantity regardless of the Host CPU or
guest decrementer operating mode. Additionally this patch corrects
several uses of decrementer values that assume a 32 bit register width.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/include/asm/exception-64s.h | 29 ++++++++++++++++++++++++
 arch/powerpc/include/asm/kvm_host.h      |  2 +-
 arch/powerpc/include/asm/kvm_ppc.h       |  2 +-
 arch/powerpc/include/uapi/asm/kvm.h      |  2 +-
 arch/powerpc/kernel/time.c               |  2 +-
 arch/powerpc/kvm/book3s_hv_interrupts.S  |  3 +--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 38 ++++++++++++++++++--------------
 arch/powerpc/kvm/emulate.c               |  6 ++---
 8 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 93ae809fe5ea..4fa303bf6d5b 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -545,4 +545,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 #define FINISH_NAP
 #endif
 
+/*
+ * On ISAv3 processors the DEC register can be extended from 32 bits to 64 by
+ * setting the LD flag the LPCR. The decrementer value is a signed quantity so
+ * sign exension is required when operating in 32 bit mode. The GET_DEC() and
+ * GET_HDEC() handle this sign extension and yield a 64 bit result independent
+ * of the LD mode.
+ *
+ * NB: It's possible run with LD mode disabled on ISAv3 so GET_DEC() does not
+ *     use a CPU_FEATURE section. A feature section is used for GET_HDEC because
+ *     it has no mode bit. It is always 64 bits for ISAv3 processors.
+ */
+
+#define IS_LD_ENABLED(reg)                 \
+	mfspr  reg,SPRN_LPCR;              \
+	andis. reg,reg,(LPCR_LD >> 16);
+
+#define GET_DEC(reg)                       \
+	IS_LD_ENABLED(reg);                \
+	mfspr reg, SPRN_DEC;               \
+	bne 99f;                           \
+	extsw reg, reg;                    \
+99:
+
+#define GET_HDEC(reg) \
+	mfspr reg, SPRN_HDEC;           \
+BEGIN_FTR_SECTION                       \
+	extsw reg, reg;                 \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
 #endif	/* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d7b343170453..6330d3fca083 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -516,7 +516,7 @@ struct kvm_vcpu_arch {
 	ulong mcsrr0;
 	ulong mcsrr1;
 	ulong mcsr;
-	u32 dec;
+	u64 dec;
 #ifdef CONFIG_BOOKE
 	u32 decar;
 #endif
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2544edabe7f3..4de0102930e9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -94,7 +94,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
 extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
-extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
+extern u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
 extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c93cf35ce379..2dd92e841127 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -215,7 +215,7 @@ struct kvm_sregs {
 			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
 			__u32 tcr;
 			__u32 decar;
-			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
+			__u64 dec;	/* KVM_SREGS_E_UPDATE_DEC */
 
 			/*
 			 * Userspace can read TB directly, but the
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 0656e80cadbf..a455f8523ae8 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -505,7 +505,7 @@ static void __timer_interrupt(void)
 	} else {
 		now = *next_tb - now;
 		if (now <= decrementer_max)
-			set_dec((int)now);
+			set_dec(now);
 		/* We may have raced with new irq work */
 		if (test_irq_work_pending())
 			set_dec(1);
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..b408f72385e4 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	 * Put whatever is in the decrementer into the
 	 * hypervisor decrementer.
 	 */
-	mfspr	r8,SPRN_DEC
+	GET_DEC(r8)
 	mftb	r7
 	mtspr	SPRN_HDEC,r8
-	extsw	r8,r8
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e571ad277398..718e5581494e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -183,6 +183,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+
+	/* XXX: ISA v3 only says the HDEC is at least as large as the DEC, but
+	 *      this code assumes we can fit HDEC in DEC. This is probably
+	 *      not an issue in practice, but... */
 	mfspr	r3, SPRN_HDEC
 	mtspr	SPRN_DEC, r3
 	/*
@@ -249,9 +253,9 @@ kvm_novcpu_wakeup:
 	bge	kvm_novcpu_exit
 
 	/* See if our timeslice has expired (HDEC is negative) */
-	mfspr	r0, SPRN_HDEC
+	GET_HDEC(r0);
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	cmpwi	r0, 0
+	cmpdi	r0, 0
 	blt	kvm_novcpu_exit
 
 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -340,8 +344,9 @@ kvm_secondary_got_guest:
 	lbz	r4, HSTATE_PTID(r13)
 	cmpwi	r4, 0
 	bne	63f
-	lis	r6, 0x7fff
-	ori	r6, r6, 0xffff
+
+	LOAD_REG_ADDR(r6, decrementer_max);
+	ld	r6,0(r6);
 	mtspr	SPRN_HDEC, r6
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
@@ -897,7 +902,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mftb	r7
 	subf	r3,r7,r8
 	mtspr	SPRN_DEC,r3
-	stw	r3,VCPU_DEC(r4)
+	std	r3,VCPU_DEC(r4)
 
 	ld	r5, VCPU_SPRG0(r4)
 	ld	r6, VCPU_SPRG1(r4)
@@ -953,8 +958,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	isync
 
 	/* Check if HDEC expires soon */
-	mfspr	r3, SPRN_HDEC
-	cmpwi	r3, 512		/* 1 microsecond */
+	GET_HDEC(r3)
+	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
 	ld	r6, VCPU_CTR(r4)
@@ -990,8 +995,9 @@ deliver_guest_interrupt:
 	beq	5f
 	li	r0, BOOK3S_INTERRUPT_EXTERNAL
 	bne	cr1, 12f
-	mfspr	r0, SPRN_DEC
-	cmpwi	r0, 0
+
+	GET_DEC(r0)
+	cmpdi	r0, 0
 	li	r0, BOOK3S_INTERRUPT_DECREMENTER
 	bge	5f
 
@@ -1206,8 +1212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
-	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,0
+	GET_HDEC(r3);
+	cmpdi	r3,0
 	mr	r4,r9
 	bge	fast_guest_return
 2:
@@ -1326,9 +1332,8 @@ mc_cont:
 	mtspr	SPRN_SPURR,r4
 
 	/* Save DEC */
-	mfspr	r5,SPRN_DEC
+	GET_DEC(r5);
 	mftb	r6
-	extsw	r5,r5
 	add	r5,r5,r6
 	/* r5 is a guest timebase value here, convert to host TB */
 	ld	r3,HSTATE_KVM_VCORE(r13)
@@ -2250,15 +2255,14 @@ _GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
 	 * no later than the end of our timeslice (HDEC interrupts
 	 * don't wake us from nap).
 	 */
-	mfspr	r3, SPRN_DEC
-	mfspr	r4, SPRN_HDEC
+	GET_DEC(r3);
+	GET_HDEC(r4);
 	mftb	r5
-	cmpw	r3, r4
+	cmpd	r3, r4
 	ble	67f
 	mtspr	SPRN_DEC, r4
 67:
 	/* save expiry time of guest decrementer */
-	extsw	r3, r3
 	add	r3, r3, r5
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 5cc2e7af3a7b..5d065c04f2d5 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	unsigned long dec_nsec;
 	unsigned long long dec_time;
 
-	pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+	pr_debug("mtDEC: %llx\n", vcpu->arch.dec);
 	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
 
 #ifdef CONFIG_PPC_BOOK3S
@@ -47,7 +47,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	kvmppc_core_dequeue_dec(vcpu);
 
 	/* POWER4+ triggers a dec interrupt if the value is < 0 */
-	if (vcpu->arch.dec & 0x80000000) {
+	if ((s64) vcpu->arch.dec < 0) {
 		kvmppc_core_queue_dec(vcpu);
 		return;
 	}
@@ -78,7 +78,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	vcpu->arch.dec_jiffies = get_tb();
 }
 
-u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
 {
 	u64 jd = tb - vcpu->arch.dec_jiffies;
 
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/2] powerpc/timer - large decrementer support
  2016-05-10  4:57 [PATCH v3 1/2] powerpc/timer - large decrementer support Oliver O'Halloran
  2016-05-10  4:57 ` [PATCH v3 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
@ 2016-05-11  5:05 ` Balbir Singh
  2016-05-31  6:25 ` Michael Neuling
  2 siblings, 0 replies; 16+ messages in thread
From: Balbir Singh @ 2016-05-11  5:05 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Michael Neuling, Jack Miller



On 10/05/16 14:57, Oliver O'Halloran wrote:
> POWER ISA v3 adds large decrementer (LD) mode of operation which increases
> the size of the decrementer register from 32 bits to an implementation
> defined with of up to 64 bits.
> 
> This patch adds support for the LD on processors with the CPU_FTR_ARCH_300
> cpu feature flag set. For CPUs with this feature LD mode is enabled when
> when the ibm,dec-bits devicetree property is supplied for the boot CPU. The
> decrementer value is a signed quantity (with negative values indicating a
> pending exception) and this property is required to find the maximum
> positive decrementer value. If this property is not supplied then the
> traditional decrementer width of 32 bits is assumed and LD mode is disabled.
> 
> This patch was based on initial work by Jack Miller.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> Cc: Michael Neuling <mikey@neuling.org>
> Cc: Balbir Singh <bsingharora@gmail.com>
> Cc: Jack Miller <jack@codezen.org>

These bits look good mostly

Reviewed-by: Balbir Singh <bsingharora@gmail.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/2] powerpc/timer - large decrementer support
  2016-05-10  4:57 [PATCH v3 1/2] powerpc/timer - large decrementer support Oliver O'Halloran
  2016-05-10  4:57 ` [PATCH v3 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
  2016-05-11  5:05 ` [PATCH v3 1/2] powerpc/timer - " Balbir Singh
@ 2016-05-31  6:25 ` Michael Neuling
  2016-05-31  7:05   ` oliver
  2016-05-31  7:16   ` [PATCH " Oliver O'Halloran
  2 siblings, 2 replies; 16+ messages in thread
From: Michael Neuling @ 2016-05-31  6:25 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Jack Miller, Michael Ellerman

On Tue, 2016-05-10 at 14:57 +1000, Oliver O'Halloran wrote:
> POWER ISA v3 adds large decrementer (LD) mode of operation which increase=
s
> the size of the decrementer register from 32 bits to an implementation
> defined with of up to 64 bits.
>=20
> This patch adds support for the LD on processors with the CPU_FTR_ARCH_30=
0
> cpu feature flag set. For CPUs with this feature LD mode is enabled when
> when the ibm,dec-bits devicetree property is supplied for the boot CPU. T=
he
> decrementer value is a signed quantity (with negative values indicating a
> pending exception) and this property is required to find the maximum
> positive decrementer value. If this property is not supplied then the
> traditional decrementer width of 32 bits is assumed and LD mode is disabl=
ed.
>=20
> This patch was based on initial work by Jack Miller.
>=20
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> Cc: Michael Neuling <mikey@neuling.org>
> Cc: Balbir Singh <bsingharora@gmail.com>
> Cc: Jack Miller <jack@codezen.org>

A couple or minor comments below. =C2=A0

I've tested this against v4.7-rc1 in the systemsim p9 model (not public
yet), with NO_HZ_FULL. From xmon I can see larger dec values being used=C2=
=A0
("Sr 16" in xmon).

If you fix the comments below I'm happy to ACK it.

Acked-by: Michael Neuling <mikey@neuling.org>


> ---
> =C2=A0arch/powerpc/include/asm/reg.h=C2=A0=C2=A0|=C2=A0=C2=A01 +
> =C2=A0arch/powerpc/include/asm/time.h |=C2=A0=C2=A06 +--
> =C2=A0arch/powerpc/kernel/time.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0| 92 =
+++++++++++++++++++++++++++++++++++++----
> =C2=A03 files changed, 89 insertions(+), 10 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/re=
g.h
> index f5f4c66bbbc9..ff581ed1ab9d 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -332,6 +332,7 @@
> =C2=A0#define=C2=A0=C2=A0=C2=A0LPCR_AIL_0	0x00000000	/* MMU off exception=
 offset 0x0 */
> =C2=A0#define=C2=A0=C2=A0=C2=A0LPCR_AIL_3	0x01800000	/* MMU on exception =
offset 0xc00...4xxx */
> =C2=A0#define=C2=A0=C2=A0=C2=A0LPCR_ONL	0x00040000	/* online - PURR/SPURR=
 count */
> +#define=C2=A0=C2=A0=C2=A0LPCR_LD	0x00020000	/* large decremeter */
> =C2=A0#define=C2=A0=C2=A0=C2=A0LPCR_PECE	0x0001f000	/* powersave exit cau=
se enable */
> =C2=A0#define=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0LPCR_PECEDP	0x00010000	/* dire=
cted priv dbells cause exit */
> =C2=A0#define=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0LPCR_PECEDH	0x00008000	/* dire=
cted hyp dbells cause exit */
> diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/t=
ime.h
> index 1092fdd7e737..09211640a0e0 100644
> --- a/arch/powerpc/include/asm/time.h
> +++ b/arch/powerpc/include/asm/time.h
> @@ -146,7 +146,7 @@ static inline void set_tb(unsigned int upper, unsigne=
d int lower)
> =C2=A0 * in auto-reload mode.=C2=A0=C2=A0The problem is PIT stops countin=
g when it
> =C2=A0 * hits zero.=C2=A0=C2=A0If it would wrap, we could use it just lik=
e a decrementer.
> =C2=A0 */
> -static inline unsigned int get_dec(void)
> +static inline u64 get_dec(void)
> =C2=A0{
> =C2=A0#if defined(CONFIG_40x)
> =C2=A0	return (mfspr(SPRN_PIT));
> @@ -160,10 +160,10 @@ static inline unsigned int get_dec(void)
> =C2=A0 * in when the decrementer generates its interrupt: on the 1 to 0
> =C2=A0 * transition for Book E/4xx, but on the 0 to -1 transition for oth=
ers.
> =C2=A0 */
> -static inline void set_dec(int val)
> +static inline void set_dec(u64 val)
> =C2=A0{
> =C2=A0#if defined(CONFIG_40x)
> -	mtspr(SPRN_PIT, val);
> +	mtspr(SPRN_PIT, (u32) val);
> =C2=A0#else
> =C2=A0#ifndef CONFIG_BOOKE
> =C2=A0	--val;
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index 81b0900a39ee..0656e80cadbf 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -95,7 +95,8 @@ static struct clocksource clocksource_timebase =3D {
> =C2=A0	.read=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=3D tim=
ebase_read,
> =C2=A0};
> =C2=A0
> -#define DECREMENTER_MAX	0x7fffffff
> +#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
> +u64 decrementer_max =3D DECREMENTER_DEFAULT_MAX;
> =C2=A0
> =C2=A0static int decrementer_set_next_event(unsigned long evt,
> =C2=A0				=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0struct clock_event_device *=
dev);
> @@ -503,7 +504,7 @@ static void __timer_interrupt(void)
> =C2=A0		__this_cpu_inc(irq_stat.timer_irqs_event);
> =C2=A0	} else {
> =C2=A0		now =3D *next_tb - now;
> -		if (now <=3D DECREMENTER_MAX)
> +		if (now <=3D decrementer_max)
> =C2=A0			set_dec((int)now);

You can remove the int cast here now since set_dec() takes a u64.

> =C2=A0		/* We may have raced with new irq work */
> =C2=A0		if (test_irq_work_pending())
> @@ -534,7 +535,7 @@ void timer_interrupt(struct pt_regs * regs)
> =C2=A0	/* Ensure a positive value is written to the decrementer, or else
> =C2=A0	=C2=A0* some CPUs will continue to take decrementer exceptions.
> =C2=A0	=C2=A0*/
> -	set_dec(DECREMENTER_MAX);
> +	set_dec(decrementer_max);
> =C2=A0
> =C2=A0	/* Some implementations of hotplug will get timer interrupts while
> =C2=A0	=C2=A0* offline, just ignore these and we also need to set
> @@ -582,9 +583,9 @@ static void generic_suspend_disable_irqs(void)
> =C2=A0	=C2=A0* with suspending.
> =C2=A0	=C2=A0*/
> =C2=A0
> -	set_dec(DECREMENTER_MAX);
> +	set_dec(decrementer_max);
> =C2=A0	local_irq_disable();
> -	set_dec(DECREMENTER_MAX);
> +	set_dec(decrementer_max);
> =C2=A0}
> =C2=A0
> =C2=A0static void generic_suspend_enable_irqs(void)
> @@ -865,7 +866,7 @@ static int decrementer_set_next_event(unsigned long e=
vt,
> =C2=A0
> =C2=A0static int decrementer_shutdown(struct clock_event_device *dev)
> =C2=A0{
> -	decrementer_set_next_event(DECREMENTER_MAX, dev);
> +	decrementer_set_next_event(decrementer_max, dev);
> =C2=A0	return 0;
> =C2=A0}
> =C2=A0
> @@ -891,6 +892,76 @@ static void register_decrementer_clockevent(int cpu)
> =C2=A0	clockevents_register_device(dec);
> =C2=A0}
> =C2=A0
> +static inline bool cpu_has_large_dec(void)
> +{
> +	return cpu_has_feature(CPU_FTR_ARCH_300);
> +}
> +
> +static inline bool large_dec_enabled(void)
> +{
> +	return (mfspr(SPRN_LPCR) & LPCR_LD) =3D=3D LPCR_LD;
> +}
> +
> +/* enables the large decrementer for the current CPU */
> +static void enable_large_decrementer(void)
> +{
> +	/* do we have a large decrementer? */
> +	if (!cpu_has_large_dec())
> +		return;
> +
> +	/* do we need a large decrementer? */
> +	if (decrementer_max <=3D DECREMENTER_DEFAULT_MAX)
> +		return;
> +
> +	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
> +
> +	if (!large_dec_enabled()) {
> +		decrementer_max =3D DECREMENTER_DEFAULT_MAX;
> +
> +		pr_warn("time_init: Failed to enable large decrementer on CPU %d\n",
> +			smp_processor_id());

Can you make this pr_warn_once() since every CPU is going to call this?

> +	}
> +}
> +
> +static void __init set_decrementer_max(void)
> +{
> +	struct device_node *cpu;
> +	const __be32 *fp;
> +	u64 bits =3D 32;
> +
> +	cpu =3D of_find_node_by_type(NULL, "cpu");
> +	if (cpu)
> +		fp =3D of_get_property(cpu, "ibm,dec-bits", NULL);
> +
> +	if (cpu && fp) {
> +		u64 dt_bits =3D of_read_number(fp, 1);
> +
> +		bits =3D dt_bits;
> +		if (bits > 64)
> +			bits =3D 64;
> +		else if (bits < 32)
> +			bits =3D 32;
> +
> +		WARN(bits !=3D dt_bits, "firmware supplied invalid ibm,dec-bits");
> +
> +		/*
> +		=C2=A0* Firmware says we support large dec but this cpu doesn't we
> +		=C2=A0* should warn about it. We can still limp along with default
> +		=C2=A0* 32 bit dec, but something is broken.
> +		=C2=A0*/
> +		if (!cpu_has_large_dec()) {
> +			WARN_ON(bits > 32);
> +			bits =3D 32;
> +		}
> +
> +		/* calculate the signed maximum given this many bits */
> +		decrementer_max =3D (1ul << (bits - 1)) - 1;
> +	}
> +
> +	pr_info("time_init: %llu bit decrementer (max: %llx)\n",
> +		bits, decrementer_max);
> +}
> +
> =C2=A0static void __init init_decrementer_clockevent(void)
> =C2=A0{
> =C2=A0	int cpu =3D smp_processor_id();
> @@ -898,7 +969,7 @@ static void __init init_decrementer_clockevent(void)
> =C2=A0	clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, =
4);
> =C2=A0
> =C2=A0	decrementer_clockevent.max_delta_ns =3D
> -		clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
> +		clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
> =C2=A0	decrementer_clockevent.min_delta_ns =3D
> =C2=A0		clockevent_delta2ns(2, &decrementer_clockevent);
> =C2=A0
> @@ -907,6 +978,9 @@ static void __init init_decrementer_clockevent(void)
> =C2=A0
> =C2=A0void secondary_cpu_time_init(void)
> =C2=A0{
> +	/* Enable the large decrementer (if we need to) */
> +	enable_large_decrementer();
> +
> =C2=A0	/* Start the decrementer on CPUs that have manual control
> =C2=A0	=C2=A0* such as BookE
> =C2=A0	=C2=A0*/
> @@ -972,6 +1046,10 @@ void __init time_init(void)
> =C2=A0	vdso_data->tb_update_count =3D 0;
> =C2=A0	vdso_data->tb_ticks_per_sec =3D tb_ticks_per_sec;
> =C2=A0
> +	/* initialise and enable the large decrementer (if we have one) */
> +	set_decrementer_max();
> +	enable_large_decrementer();
> +
> =C2=A0	/* Start the decrementer on CPUs that have manual control
> =C2=A0	=C2=A0* such as BookE
> =C2=A0	=C2=A0*/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/2] powerpc/timer - large decrementer support
  2016-05-31  6:25 ` Michael Neuling
@ 2016-05-31  7:05   ` oliver
  2016-05-31  7:16   ` [PATCH " Oliver O'Halloran
  1 sibling, 0 replies; 16+ messages in thread
From: oliver @ 2016-05-31  7:05 UTC (permalink / raw)
  To: Michael Neuling; +Cc: linuxppc-dev, Jack Miller, Michael Ellerman

On Tue, May 31, 2016 at 4:25 PM, Michael Neuling <mikey@neuling.org> wrote:
> On Tue, 2016-05-10 at 14:57 +1000, Oliver O'Halloran wrote:
>>  static int decrementer_set_next_event(unsigned long evt,
>>                                     struct clock_event_device *dev);
>> @@ -503,7 +504,7 @@ static void __timer_interrupt(void)
>>               __this_cpu_inc(irq_stat.timer_irqs_event);
>>       } else {
>>               now = *next_tb - now;
>> -             if (now <= DECREMENTER_MAX)
>> +             if (now <= decrementer_max)
>>                       set_dec((int)now);
>
> You can remove the int cast here now since set_dec() takes a u64.

I thought I had fixed this, but it looks like I squashed the change to
into the second patch by accident, whoops.

>> +/* enables the large decrementer for the current CPU */
>> +static void enable_large_decrementer(void)
>> +{
>> +     /* do we have a large decrementer? */
>> +     if (!cpu_has_large_dec())
>> +             return;
>> +
>> +     /* do we need a large decrementer? */
>> +     if (decrementer_max <= DECREMENTER_DEFAULT_MAX)
>> +             return;
>> +
>> +     mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
>> +
>> +     if (!large_dec_enabled()) {
>> +             decrementer_max = DECREMENTER_DEFAULT_MAX;
>> +
>> +             pr_warn("time_init: Failed to enable large decrementer on CPU %d\n",
>> +                     smp_processor_id());
>
> Can you make this pr_warn_once() since every CPU is going to call this?

Sure.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/2] powerpc/timer - large decrementer support
  2016-05-31  6:25 ` Michael Neuling
  2016-05-31  7:05   ` oliver
@ 2016-05-31  7:16   ` Oliver O'Halloran
  2016-05-31  7:16     ` [PATCH 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
  2016-06-01  5:28     ` [PATCH 1/2] powerpc/timer - " Michael Neuling
  1 sibling, 2 replies; 16+ messages in thread
From: Oliver O'Halloran @ 2016-05-31  7:16 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Oliver O'Halloran, Michael Neuling, Balbir Singh, Jack Miller

POWER ISA v3 adds large decrementer (LD) mode of operation which increases
the size of the decrementer register from 32 bits to an implementation
defined with of up to 64 bits.

This patch adds support for the LD on processors with the CPU_FTR_ARCH_300
cpu feature flag set. For CPUs with this feature LD mode is enabled when
when the ibm,dec-bits devicetree property is supplied for the boot CPU. The
decrementer value is a signed quantity (with negative values indicating a
pending exception) and this property is required to find the maximum
positive decrementer value. If this property is not supplied then the
traditional decrementer width of 32 bits is assumed and LD mode is disabled.

This patch was based on initial work by Jack Miller.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Jack Miller <jack@codezen.org>
---
 arch/powerpc/include/asm/reg.h  |  1 +
 arch/powerpc/include/asm/time.h |  6 +--
 arch/powerpc/kernel/time.c      | 94 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c1e82e968506..2793f3f03f9b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -332,6 +332,7 @@
 #define   LPCR_AIL_0	0x00000000	/* MMU off exception offset 0x0 */
 #define   LPCR_AIL_3	0x01800000	/* MMU on exception offset 0xc00...4xxx */
 #define   LPCR_ONL	0x00040000	/* online - PURR/SPURR count */
+#define   LPCR_LD	0x00020000	/* large decremeter */
 #define   LPCR_PECE	0x0001f000	/* powersave exit cause enable */
 #define     LPCR_PECEDP	0x00010000	/* directed priv dbells cause exit */
 #define     LPCR_PECEDH	0x00008000	/* directed hyp dbells cause exit */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 1092fdd7e737..09211640a0e0 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -146,7 +146,7 @@ static inline void set_tb(unsigned int upper, unsigned int lower)
  * in auto-reload mode.  The problem is PIT stops counting when it
  * hits zero.  If it would wrap, we could use it just like a decrementer.
  */
-static inline unsigned int get_dec(void)
+static inline u64 get_dec(void)
 {
 #if defined(CONFIG_40x)
 	return (mfspr(SPRN_PIT));
@@ -160,10 +160,10 @@ static inline unsigned int get_dec(void)
  * in when the decrementer generates its interrupt: on the 1 to 0
  * transition for Book E/4xx, but on the 0 to -1 transition for others.
  */
-static inline void set_dec(int val)
+static inline void set_dec(u64 val)
 {
 #if defined(CONFIG_40x)
-	mtspr(SPRN_PIT, val);
+	mtspr(SPRN_PIT, (u32) val);
 #else
 #ifndef CONFIG_BOOKE
 	--val;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 3ed9a5a21d77..fe66f1c8d8b2 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -96,7 +96,8 @@ static struct clocksource clocksource_timebase = {
 	.read         = timebase_read,
 };
 
-#define DECREMENTER_MAX	0x7fffffff
+#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
+u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
 
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev);
@@ -504,8 +505,8 @@ static void __timer_interrupt(void)
 		__this_cpu_inc(irq_stat.timer_irqs_event);
 	} else {
 		now = *next_tb - now;
-		if (now <= DECREMENTER_MAX)
-			set_dec((int)now);
+		if (now <= decrementer_max)
+			set_dec(now);
 		/* We may have raced with new irq work */
 		if (test_irq_work_pending())
 			set_dec(1);
@@ -535,7 +536,7 @@ void timer_interrupt(struct pt_regs * regs)
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
 	 */
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 
 	/* Some implementations of hotplug will get timer interrupts while
 	 * offline, just ignore these and we also need to set
@@ -583,9 +584,9 @@ static void generic_suspend_disable_irqs(void)
 	 * with suspending.
 	 */
 
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 	local_irq_disable();
-	set_dec(DECREMENTER_MAX);
+	set_dec(decrementer_max);
 }
 
 static void generic_suspend_enable_irqs(void)
@@ -866,7 +867,7 @@ static int decrementer_set_next_event(unsigned long evt,
 
 static int decrementer_shutdown(struct clock_event_device *dev)
 {
-	decrementer_set_next_event(DECREMENTER_MAX, dev);
+	decrementer_set_next_event(decrementer_max, dev);
 	return 0;
 }
 
@@ -892,6 +893,76 @@ static void register_decrementer_clockevent(int cpu)
 	clockevents_register_device(dec);
 }
 
+static inline bool cpu_has_large_dec(void)
+{
+	return cpu_has_feature(CPU_FTR_ARCH_300);
+}
+
+static inline bool large_dec_enabled(void)
+{
+	return (mfspr(SPRN_LPCR) & LPCR_LD) == LPCR_LD;
+}
+
+/* enables the large decrementer for the current CPU */
+static void enable_large_decrementer(void)
+{
+	/* do we have a large decrementer? */
+	if (!cpu_has_large_dec())
+		return;
+
+	/* do we need a large decrementer? */
+	if (decrementer_max <= DECREMENTER_DEFAULT_MAX)
+		return;
+
+	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
+
+	if (!large_dec_enabled()) {
+		decrementer_max = DECREMENTER_DEFAULT_MAX;
+
+		pr_warn_once("time_init: Failed to enable large decrementer on CPU %d\n",
+			smp_processor_id());
+	}
+}
+
+static void __init set_decrementer_max(void)
+{
+	struct device_node *cpu;
+	const __be32 *fp;
+	u64 bits = 32;
+
+	cpu = of_find_node_by_type(NULL, "cpu");
+	if (cpu)
+		fp = of_get_property(cpu, "ibm,dec-bits", NULL);
+
+	if (cpu && fp) {
+		u64 dt_bits = of_read_number(fp, 1);
+
+		bits = dt_bits;
+		if (bits > 64)
+			bits = 64;
+		else if (bits < 32)
+			bits = 32;
+
+		WARN(bits != dt_bits, "firmware supplied invalid ibm,dec-bits");
+
+		/*
+		 * Firmware says we support large dec but this cpu doesn't we
+		 * should warn about it. We can still limp along with default
+		 * 32 bit dec, but something is broken.
+		 */
+		if (!cpu_has_large_dec()) {
+			WARN_ON(bits > 32);
+			bits = 32;
+		}
+
+		/* calculate the signed maximum given this many bits */
+		decrementer_max = (1ul << (bits - 1)) - 1;
+	}
+
+	pr_info("time_init: %llu bit decrementer (max: %llx)\n",
+		bits, decrementer_max);
+}
+
 static void __init init_decrementer_clockevent(void)
 {
 	int cpu = smp_processor_id();
@@ -899,7 +970,7 @@ static void __init init_decrementer_clockevent(void)
 	clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
 
 	decrementer_clockevent.max_delta_ns =
-		clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
+		clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
 	decrementer_clockevent.min_delta_ns =
 		clockevent_delta2ns(2, &decrementer_clockevent);
 
@@ -908,6 +979,9 @@ static void __init init_decrementer_clockevent(void)
 
 void secondary_cpu_time_init(void)
 {
+	/* Enable the large decrementer (if we need to) */
+	enable_large_decrementer();
+
 	/* Start the decrementer on CPUs that have manual control
 	 * such as BookE
 	 */
@@ -973,6 +1047,10 @@ void __init time_init(void)
 	vdso_data->tb_update_count = 0;
 	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
 
+	/* initialise and enable the large decrementer (if we have one) */
+	set_decrementer_max();
+	enable_large_decrementer();
+
 	/* Start the decrementer on CPUs that have manual control
 	 * such as BookE
 	 */
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] KVM: PPC: hypervisor large decrementer support
  2016-05-31  7:16   ` [PATCH " Oliver O'Halloran
@ 2016-05-31  7:16     ` Oliver O'Halloran
  2016-06-01  6:23       ` Michael Neuling
  2016-06-01  5:28     ` [PATCH 1/2] powerpc/timer - " Michael Neuling
  1 sibling, 1 reply; 16+ messages in thread
From: Oliver O'Halloran @ 2016-05-31  7:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran, Paul Mackerras, Michael Neuling

Power ISAv3 extends the width of the decrementer register from 32 bits.
The enlarged register width is implementation dependent, but reads from
these registers are automatically sign extended to produce a 64 bit
output when operating in large mode. The HDEC always operates in large
mode while the DEC register can be operated in 32bit mode or large mode
depending on the setting of the LPCR.LD bit.

Currently the hypervisor assumes that reads from the DEC and HDEC
register produce a 32 bit result which it sign extends to 64 bits using
the extsw instruction. This behaviour can result in the guest DEC
register value being corrupted by the hypervisor when the guest is
operating in LD mode since the results of the extsw instruction only
depends on the value of bit 31 in the register to be sign extended.

This patch adds the GET_DEC() and GET_HDEC() assembly macros for reading
from the decrementer registers. These macros will return the current
decrementer value as a 64 bit quantity regardless of the Host CPU or
guest decrementer operating mode. Additionally this patch corrects
several uses of decrementer values that assume a 32 bit register width.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/include/asm/exception-64s.h | 29 ++++++++++++++++++++++++
 arch/powerpc/include/asm/kvm_host.h      |  2 +-
 arch/powerpc/include/asm/kvm_ppc.h       |  2 +-
 arch/powerpc/include/uapi/asm/kvm.h      |  2 +-
 arch/powerpc/kvm/book3s_hv_interrupts.S  |  3 +--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 38 ++++++++++++++++++--------------
 arch/powerpc/kvm/emulate.c               |  6 ++---
 7 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 93ae809fe5ea..4fa303bf6d5b 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -545,4 +545,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 #define FINISH_NAP
 #endif
 
+/*
+ * On ISAv3 processors the DEC register can be extended from 32 bits to 64 by
+ * setting the LD flag the LPCR. The decrementer value is a signed quantity so
+ * sign exension is required when operating in 32 bit mode. The GET_DEC() and
+ * GET_HDEC() handle this sign extension and yield a 64 bit result independent
+ * of the LD mode.
+ *
+ * NB: It's possible run with LD mode disabled on ISAv3 so GET_DEC() does not
+ *     use a CPU_FEATURE section. A feature section is used for GET_HDEC because
+ *     it has no mode bit. It is always 64 bits for ISAv3 processors.
+ */
+
+#define IS_LD_ENABLED(reg)                 \
+	mfspr  reg,SPRN_LPCR;              \
+	andis. reg,reg,(LPCR_LD >> 16);
+
+#define GET_DEC(reg)                       \
+	IS_LD_ENABLED(reg);                \
+	mfspr reg, SPRN_DEC;               \
+	bne 99f;                           \
+	extsw reg, reg;                    \
+99:
+
+#define GET_HDEC(reg) \
+	mfspr reg, SPRN_HDEC;           \
+BEGIN_FTR_SECTION                       \
+	extsw reg, reg;                 \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
 #endif	/* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ec35af34a3fb..ddea233e2cce 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -520,7 +520,7 @@ struct kvm_vcpu_arch {
 	ulong mcsrr0;
 	ulong mcsrr1;
 	ulong mcsr;
-	u32 dec;
+	u64 dec;
 #ifdef CONFIG_BOOKE
 	u32 decar;
 #endif
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2544edabe7f3..4de0102930e9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -94,7 +94,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
 extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
-extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
+extern u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
 extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c93cf35ce379..2dd92e841127 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -215,7 +215,7 @@ struct kvm_sregs {
 			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
 			__u32 tcr;
 			__u32 decar;
-			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
+			__u64 dec;	/* KVM_SREGS_E_UPDATE_DEC */
 
 			/*
 			 * Userspace can read TB directly, but the
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..b408f72385e4 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	 * Put whatever is in the decrementer into the
 	 * hypervisor decrementer.
 	 */
-	mfspr	r8,SPRN_DEC
+	GET_DEC(r8)
 	mftb	r7
 	mtspr	SPRN_HDEC,r8
-	extsw	r8,r8
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e571ad277398..718e5581494e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -183,6 +183,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+
+	/* XXX: ISA v3 only says the HDEC is at least as large as the DEC, but
+	 *      this code assumes we can fit HDEC in DEC. This is probably
+	 *      not an issue in practice, but... */
 	mfspr	r3, SPRN_HDEC
 	mtspr	SPRN_DEC, r3
 	/*
@@ -249,9 +253,9 @@ kvm_novcpu_wakeup:
 	bge	kvm_novcpu_exit
 
 	/* See if our timeslice has expired (HDEC is negative) */
-	mfspr	r0, SPRN_HDEC
+	GET_HDEC(r0);
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	cmpwi	r0, 0
+	cmpdi	r0, 0
 	blt	kvm_novcpu_exit
 
 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -340,8 +344,9 @@ kvm_secondary_got_guest:
 	lbz	r4, HSTATE_PTID(r13)
 	cmpwi	r4, 0
 	bne	63f
-	lis	r6, 0x7fff
-	ori	r6, r6, 0xffff
+
+	LOAD_REG_ADDR(r6, decrementer_max);
+	ld	r6,0(r6);
 	mtspr	SPRN_HDEC, r6
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
@@ -897,7 +902,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mftb	r7
 	subf	r3,r7,r8
 	mtspr	SPRN_DEC,r3
-	stw	r3,VCPU_DEC(r4)
+	std	r3,VCPU_DEC(r4)
 
 	ld	r5, VCPU_SPRG0(r4)
 	ld	r6, VCPU_SPRG1(r4)
@@ -953,8 +958,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	isync
 
 	/* Check if HDEC expires soon */
-	mfspr	r3, SPRN_HDEC
-	cmpwi	r3, 512		/* 1 microsecond */
+	GET_HDEC(r3)
+	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
 	ld	r6, VCPU_CTR(r4)
@@ -990,8 +995,9 @@ deliver_guest_interrupt:
 	beq	5f
 	li	r0, BOOK3S_INTERRUPT_EXTERNAL
 	bne	cr1, 12f
-	mfspr	r0, SPRN_DEC
-	cmpwi	r0, 0
+
+	GET_DEC(r0)
+	cmpdi	r0, 0
 	li	r0, BOOK3S_INTERRUPT_DECREMENTER
 	bge	5f
 
@@ -1206,8 +1212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
-	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,0
+	GET_HDEC(r3);
+	cmpdi	r3,0
 	mr	r4,r9
 	bge	fast_guest_return
 2:
@@ -1326,9 +1332,8 @@ mc_cont:
 	mtspr	SPRN_SPURR,r4
 
 	/* Save DEC */
-	mfspr	r5,SPRN_DEC
+	GET_DEC(r5);
 	mftb	r6
-	extsw	r5,r5
 	add	r5,r5,r6
 	/* r5 is a guest timebase value here, convert to host TB */
 	ld	r3,HSTATE_KVM_VCORE(r13)
@@ -2250,15 +2255,14 @@ _GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
 	 * no later than the end of our timeslice (HDEC interrupts
 	 * don't wake us from nap).
 	 */
-	mfspr	r3, SPRN_DEC
-	mfspr	r4, SPRN_HDEC
+	GET_DEC(r3);
+	GET_HDEC(r4);
 	mftb	r5
-	cmpw	r3, r4
+	cmpd	r3, r4
 	ble	67f
 	mtspr	SPRN_DEC, r4
 67:
 	/* save expiry time of guest decrementer */
-	extsw	r3, r3
 	add	r3, r3, r5
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 5cc2e7af3a7b..5d065c04f2d5 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	unsigned long dec_nsec;
 	unsigned long long dec_time;
 
-	pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+	pr_debug("mtDEC: %llx\n", vcpu->arch.dec);
 	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
 
 #ifdef CONFIG_PPC_BOOK3S
@@ -47,7 +47,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	kvmppc_core_dequeue_dec(vcpu);
 
 	/* POWER4+ triggers a dec interrupt if the value is < 0 */
-	if (vcpu->arch.dec & 0x80000000) {
+	if ((s64) vcpu->arch.dec < 0) {
 		kvmppc_core_queue_dec(vcpu);
 		return;
 	}
@@ -78,7 +78,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	vcpu->arch.dec_jiffies = get_tb();
 }
 
-u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
 {
 	u64 jd = tb - vcpu->arch.dec_jiffies;
 
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] powerpc/timer - large decrementer support
  2016-05-31  7:16   ` [PATCH " Oliver O'Halloran
  2016-05-31  7:16     ` [PATCH 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
@ 2016-06-01  5:28     ` Michael Neuling
  2016-06-03  2:01       ` Balbir Singh
  1 sibling, 1 reply; 16+ messages in thread
From: Michael Neuling @ 2016-06-01  5:28 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Balbir Singh, Jack Miller

On Tue, 2016-05-31 at 17:16 +1000, Oliver O'Halloran wrote:
> POWER ISA v3 adds large decrementer (LD) mode of operation which
> increases
> the size of the decrementer register from 32 bits to an implementation
> defined with of up to 64 bits.
>=20
> This patch adds support for the LD on processors with the
> CPU_FTR_ARCH_300
> cpu feature flag set. For CPUs with this feature LD mode is enabled when
> when the ibm,dec-bits devicetree property is supplied for the boot CPU.
> The
> decrementer value is a signed quantity (with negative values indicating a
> pending exception) and this property is required to find the maximum
> positive decrementer value. If this property is not supplied then the
> traditional decrementer width of 32 bits is assumed and LD mode is
> disabled.
>=20
> This patch was based on initial work by Jack Miller.
>=20
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> Cc: Michael Neuling <mikey@neuling.org>

Acked-by: Michael Neuling <mikey@neuling.org>

> Cc: Balbir Singh <bsingharora@gmail.com>
> Cc: Jack Miller <jack@codezen.org>
> ---
> =C2=A0arch/powerpc/include/asm/reg.h=C2=A0=C2=A0|=C2=A0=C2=A01 +
> =C2=A0arch/powerpc/include/asm/time.h |=C2=A0=C2=A06 +--
> =C2=A0arch/powerpc/kernel/time.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0| 94
> +++++++++++++++++++++++++++++++++++++----
> =C2=A03 files changed, 90 insertions(+), 11 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/reg.h
> b/arch/powerpc/include/asm/reg.h
> index c1e82e968506..2793f3f03f9b 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -332,6 +332,7 @@
> =C2=A0#define=C2=A0=C2=A0=C2=A0LPCR_AIL_0	0x00000000	/* MMU off exception
> offset 0x0 */
> =C2=A0#define=C2=A0=C2=A0=C2=A0LPCR_AIL_3	0x01800000	/* MMU on exception =
offset
> 0xc00...4xxx */
> =C2=A0#define=C2=A0=C2=A0=C2=A0LPCR_ONL	0x00040000	/* online - PURR/SPURR=
 count
> */
> +#define=C2=A0=C2=A0=C2=A0LPCR_LD	0x00020000	/* large decremeter */
> =C2=A0#define=C2=A0=C2=A0=C2=A0LPCR_PECE	0x0001f000	/* powersave exit cau=
se
> enable */
> =C2=A0#define=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0LPCR_PECEDP	0x00010000	/* dire=
cted priv dbells
> cause exit */
> =C2=A0#define=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0LPCR_PECEDH	0x00008000	/* dire=
cted hyp dbells
> cause exit */
> diff --git a/arch/powerpc/include/asm/time.h
> b/arch/powerpc/include/asm/time.h
> index 1092fdd7e737..09211640a0e0 100644
> --- a/arch/powerpc/include/asm/time.h
> +++ b/arch/powerpc/include/asm/time.h
> @@ -146,7 +146,7 @@ static inline void set_tb(unsigned int upper,
> unsigned int lower)
> =C2=A0 * in auto-reload mode.=C2=A0=C2=A0The problem is PIT stops countin=
g when it
> =C2=A0 * hits zero.=C2=A0=C2=A0If it would wrap, we could use it just lik=
e a
> decrementer.
> =C2=A0 */
> -static inline unsigned int get_dec(void)
> +static inline u64 get_dec(void)
> =C2=A0{
> =C2=A0#if defined(CONFIG_40x)
> =C2=A0	return (mfspr(SPRN_PIT));
> @@ -160,10 +160,10 @@ static inline unsigned int get_dec(void)
> =C2=A0 * in when the decrementer generates its interrupt: on the 1 to 0
> =C2=A0 * transition for Book E/4xx, but on the 0 to -1 transition for oth=
ers.
> =C2=A0 */
> -static inline void set_dec(int val)
> +static inline void set_dec(u64 val)
> =C2=A0{
> =C2=A0#if defined(CONFIG_40x)
> -	mtspr(SPRN_PIT, val);
> +	mtspr(SPRN_PIT, (u32) val);
> =C2=A0#else
> =C2=A0#ifndef CONFIG_BOOKE
> =C2=A0	--val;
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index 3ed9a5a21d77..fe66f1c8d8b2 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -96,7 +96,8 @@ static struct clocksource clocksource_timebase =3D {
> =C2=A0	.read=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=3D tim=
ebase_read,
> =C2=A0};
> =C2=A0
> -#define DECREMENTER_MAX	0x7fffffff
> +#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
> +u64 decrementer_max =3D DECREMENTER_DEFAULT_MAX;
> =C2=A0
> =C2=A0static int decrementer_set_next_event(unsigned long evt,
> =C2=A0				=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0struct clock_event_device *=
dev);
> @@ -504,8 +505,8 @@ static void __timer_interrupt(void)
> =C2=A0		__this_cpu_inc(irq_stat.timer_irqs_event);
> =C2=A0	} else {
> =C2=A0		now =3D *next_tb - now;
> -		if (now <=3D DECREMENTER_MAX)
> -			set_dec((int)now);
> +		if (now <=3D decrementer_max)
> +			set_dec(now);
> =C2=A0		/* We may have raced with new irq work */
> =C2=A0		if (test_irq_work_pending())
> =C2=A0			set_dec(1);
> @@ -535,7 +536,7 @@ void timer_interrupt(struct pt_regs * regs)
> =C2=A0	/* Ensure a positive value is written to the decrementer, or
> else
> =C2=A0	=C2=A0* some CPUs will continue to take decrementer exceptions.
> =C2=A0	=C2=A0*/
> -	set_dec(DECREMENTER_MAX);
> +	set_dec(decrementer_max);
> =C2=A0
> =C2=A0	/* Some implementations of hotplug will get timer interrupts
> while
> =C2=A0	=C2=A0* offline, just ignore these and we also need to set
> @@ -583,9 +584,9 @@ static void generic_suspend_disable_irqs(void)
> =C2=A0	=C2=A0* with suspending.
> =C2=A0	=C2=A0*/
> =C2=A0
> -	set_dec(DECREMENTER_MAX);
> +	set_dec(decrementer_max);
> =C2=A0	local_irq_disable();
> -	set_dec(DECREMENTER_MAX);
> +	set_dec(decrementer_max);
> =C2=A0}
> =C2=A0
> =C2=A0static void generic_suspend_enable_irqs(void)
> @@ -866,7 +867,7 @@ static int decrementer_set_next_event(unsigned long
> evt,
> =C2=A0
> =C2=A0static int decrementer_shutdown(struct clock_event_device *dev)
> =C2=A0{
> -	decrementer_set_next_event(DECREMENTER_MAX, dev);
> +	decrementer_set_next_event(decrementer_max, dev);
> =C2=A0	return 0;
> =C2=A0}
> =C2=A0
> @@ -892,6 +893,76 @@ static void register_decrementer_clockevent(int cpu)
> =C2=A0	clockevents_register_device(dec);
> =C2=A0}
> =C2=A0
> +static inline bool cpu_has_large_dec(void)
> +{
> +	return cpu_has_feature(CPU_FTR_ARCH_300);
> +}
> +
> +static inline bool large_dec_enabled(void)
> +{
> +	return (mfspr(SPRN_LPCR) & LPCR_LD) =3D=3D LPCR_LD;
> +}
> +
> +/* enables the large decrementer for the current CPU */
> +static void enable_large_decrementer(void)
> +{
> +	/* do we have a large decrementer? */
> +	if (!cpu_has_large_dec())
> +		return;
> +
> +	/* do we need a large decrementer? */
> +	if (decrementer_max <=3D DECREMENTER_DEFAULT_MAX)
> +		return;
> +
> +	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
> +
> +	if (!large_dec_enabled()) {
> +		decrementer_max =3D DECREMENTER_DEFAULT_MAX;
> +
> +		pr_warn_once("time_init: Failed to enable large
> decrementer on CPU %d\n",
> +			smp_processor_id());
> +	}
> +}
> +
> +static void __init set_decrementer_max(void)
> +{
> +	struct device_node *cpu;
> +	const __be32 *fp;
> +	u64 bits =3D 32;
> +
> +	cpu =3D of_find_node_by_type(NULL, "cpu");
> +	if (cpu)
> +		fp =3D of_get_property(cpu, "ibm,dec-bits", NULL);
> +
> +	if (cpu && fp) {
> +		u64 dt_bits =3D of_read_number(fp, 1);
> +
> +		bits =3D dt_bits;
> +		if (bits > 64)
> +			bits =3D 64;
> +		else if (bits < 32)
> +			bits =3D 32;
> +
> +		WARN(bits !=3D dt_bits, "firmware supplied invalid
> ibm,dec-bits");
> +
> +		/*
> +		=C2=A0* Firmware says we support large dec but this cpu
> doesn't we
> +		=C2=A0* should warn about it. We can still limp along with
> default
> +		=C2=A0* 32 bit dec, but something is broken.
> +		=C2=A0*/
> +		if (!cpu_has_large_dec()) {
> +			WARN_ON(bits > 32);
> +			bits =3D 32;
> +		}
> +
> +		/* calculate the signed maximum given this many bits */
> +		decrementer_max =3D (1ul << (bits - 1)) - 1;
> +	}
> +
> +	pr_info("time_init: %llu bit decrementer (max: %llx)\n",
> +		bits, decrementer_max);
> +}
> +
> =C2=A0static void __init init_decrementer_clockevent(void)
> =C2=A0{
> =C2=A0	int cpu =3D smp_processor_id();
> @@ -899,7 +970,7 @@ static void __init init_decrementer_clockevent(void)
> =C2=A0	clockevents_calc_mult_shift(&decrementer_clockevent,
> ppc_tb_freq, 4);
> =C2=A0
> =C2=A0	decrementer_clockevent.max_delta_ns =3D
> -		clockevent_delta2ns(DECREMENTER_MAX,
> &decrementer_clockevent);
> +		clockevent_delta2ns(decrementer_max,
> &decrementer_clockevent);
> =C2=A0	decrementer_clockevent.min_delta_ns =3D
> =C2=A0		clockevent_delta2ns(2, &decrementer_clockevent);
> =C2=A0
> @@ -908,6 +979,9 @@ static void __init init_decrementer_clockevent(void)
> =C2=A0
> =C2=A0void secondary_cpu_time_init(void)
> =C2=A0{
> +	/* Enable the large decrementer (if we need to) */
> +	enable_large_decrementer();
> +
> =C2=A0	/* Start the decrementer on CPUs that have manual control
> =C2=A0	=C2=A0* such as BookE
> =C2=A0	=C2=A0*/
> @@ -973,6 +1047,10 @@ void __init time_init(void)
> =C2=A0	vdso_data->tb_update_count =3D 0;
> =C2=A0	vdso_data->tb_ticks_per_sec =3D tb_ticks_per_sec;
> =C2=A0
> +	/* initialise and enable the large decrementer (if we have one)
> */
> +	set_decrementer_max();
> +	enable_large_decrementer();
> +
> =C2=A0	/* Start the decrementer on CPUs that have manual control
> =C2=A0	=C2=A0* such as BookE
> =C2=A0	=C2=A0*/

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: PPC: hypervisor large decrementer support
  2016-05-31  7:16     ` [PATCH 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
@ 2016-06-01  6:23       ` Michael Neuling
  2016-06-02 21:46         ` Benjamin Herrenschmidt
  2016-06-22  7:30         ` oliver
  0 siblings, 2 replies; 16+ messages in thread
From: Michael Neuling @ 2016-06-01  6:23 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Paul Mackerras

On Tue, 2016-05-31 at 17:16 +1000, Oliver O'Halloran wrote:
> Power ISAv3 extends the width of the decrementer register from 32 bits.
> The enlarged register width is implementation dependent, but reads from
> these registers are automatically sign extended to produce a 64 bit
> output when operating in large mode. The HDEC always operates in large
> mode while the DEC register can be operated in 32bit mode or large mode
> depending on the setting of the LPCR.LD bit.
>=20
> Currently the hypervisor assumes that reads from the DEC and HDEC
> register produce a 32 bit result which it sign extends to 64 bits using
> the extsw instruction. This behaviour can result in the guest DEC
> register value being corrupted by the hypervisor when the guest is
> operating in LD mode since the results of the extsw instruction only
> depends on the value of bit 31 in the register to be sign extended.
>=20
> This patch adds the GET_DEC() and GET_HDEC() assembly macros for reading
> from the decrementer registers. These macros will return the current
> decrementer value as a 64 bit quantity regardless of=C2=A0the Host CPU or
> guest decrementer operating mode. Additionally this patch corrects
> several uses of decrementer values that assume a 32 bit register width.
>=20
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Michael Neuling <mikey@neuling.org>
> ---
> =C2=A0arch/powerpc/include/asm/exception-64s.h | 29 +++++++++++++++++++++=
+++
> =C2=A0arch/powerpc/include/asm/kvm_host.h=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=
=C2=A0|=C2=A0=C2=A02 +-
> =C2=A0arch/powerpc/include/asm/kvm_ppc.h=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0|=C2=A0=C2=A02 +-
> =C2=A0arch/powerpc/include/uapi/asm/kvm.h=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=
=C2=A0|=C2=A0=C2=A02 +-
> =C2=A0arch/powerpc/kvm/book3s_hv_interrupts.S=C2=A0=C2=A0|=C2=A0=C2=A03 +=
--
> =C2=A0arch/powerpc/kvm/book3s_hv_rmhandlers.S=C2=A0=C2=A0| 38 +++++++++++=
+++++++--------------
> =C2=A0arch/powerpc/kvm/emulate.c=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0|=C2=A0=C2=A06 ++---
> =C2=A07 files changed, 57 insertions(+), 25 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/incl=
ude/asm/exception-64s.h
> index 93ae809fe5ea..4fa303bf6d5b 100644
> --- a/arch/powerpc/include/asm/exception-64s.h
> +++ b/arch/powerpc/include/asm/exception-64s.h
> @@ -545,4 +545,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
> =C2=A0#define FINISH_NAP
> =C2=A0#endif
> =C2=A0
> +/*
> + * On ISAv3 processors the DEC register can be extended from 32 bits to =
64 by
> + * setting the LD flag the LPCR. The decrementer value is a signed quant=
ity so
> + * sign exension is required when operating in 32 bit mode. The GET_DEC(=
) and
> + * GET_HDEC() handle this sign extension and yield a 64 bit result indep=
endent
> + * of the LD mode.
> + *
> + * NB: It's possible run with LD mode disabled on ISAv3 so GET_DEC() doe=
s not
> + *=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0use a CPU_FEATURE section. A feature sec=
tion is used for GET_HDEC because
> + *=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0it has no mode bit. It is always 64 bits=
 for ISAv3 processors.
> + */
> +
> +#define IS_LD_ENABLED(reg)=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\
> +	mfspr=C2=A0=C2=A0reg,SPRN_LPCR;=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\
> +	andis. reg,reg,(LPCR_LD >> 16);

FWIW you can use:
=C2=A0 =C2=A0 =C2=A0 =C2=A0 andis. reg,reg,(LPCR_LD)@ha

> +#define GET_DEC(reg)=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=
=C2=A0=C2=A0\
> +	IS_LD_ENABLED(reg);=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\
> +	mfspr reg, SPRN_DEC;=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\
> +	bne 99f;=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=
=C2=A0=C2=A0=C2=A0=C2=A0\
> +	extsw reg, reg;=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=
=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\
> +99:

It's a little painful that GET_DEC() is now 2 SPR moves. =C2=A0SPR moves ca=
n be
a bit expensive. =C2=A0Probably ok for now, but might be nice to store the =
guest
dec LD state somewhere so we don't have to retrieve it from the LPCR SPR.

Actually, it's probably best to do this now since checking the LD bit in
the LPCR on P8 is a bit dodgy and unnecessary. There is a kvm->arch.lpcr
you might be able use for this and you can add an asm-offsets for it too
(like KVM_LPID).

Is GET_DEC ever run in HV=3D0, where the guest couldn't read the LPCR?

Also, this now trashes cr0... have you checked that's ok in the paths it's
used?

> +
> +#define GET_HDEC(reg) \
> +	mfspr reg, SPRN_HDEC;=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=
=C2=A0=C2=A0=C2=A0\
> +BEGIN_FTR_SECTION=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=
=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=
=A0=C2=A0\
> +	extsw reg, reg;=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=
=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0\
> +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> +
> =C2=A0#endif	/* _ASM_POWERPC_EXCEPTION_H */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/a=
sm/kvm_host.h
> index ec35af34a3fb..ddea233e2cce 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -520,7 +520,7 @@ struct kvm_vcpu_arch {
> =C2=A0	ulong mcsrr0;
> =C2=A0	ulong mcsrr1;
> =C2=A0	ulong mcsr;
> -	u32 dec;
> +	u64 dec;
> =C2=A0#ifdef CONFIG_BOOKE
> =C2=A0	u32 decar;
> =C2=A0#endif
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/as=
m/kvm_ppc.h
> index 2544edabe7f3..4de0102930e9 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -94,7 +94,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *r=
un,
> =C2=A0extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
> =C2=A0extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu=
 *vcpu);
> =C2=A0extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> -extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> +extern u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> =C2=A0extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
> =C2=A0extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
> =C2=A0extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/u=
api/asm/kvm.h
> index c93cf35ce379..2dd92e841127 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -215,7 +215,7 @@ struct kvm_sregs {
> =C2=A0			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
> =C2=A0			__u32 tcr;
> =C2=A0			__u32 decar;
> -			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
> +			__u64 dec;	/* KVM_SREGS_E_UPDATE_DEC */
> =C2=A0
> =C2=A0			/*
> =C2=A0			=C2=A0* Userspace can read TB directly, but the
> diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/b=
ook3s_hv_interrupts.S
> index 0fdc4a28970b..b408f72385e4 100644
> --- a/arch/powerpc/kvm/book3s_hv_interrupts.S
> +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
> @@ -121,10 +121,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> =C2=A0	=C2=A0* Put whatever is in the decrementer into the
> =C2=A0	=C2=A0* hypervisor decrementer.
> =C2=A0	=C2=A0*/
> -	mfspr	r8,SPRN_DEC
> +	GET_DEC(r8)
> =C2=A0	mftb	r7
> =C2=A0	mtspr	SPRN_HDEC,r8
> -	extsw	r8,r8
> =C2=A0	add	r8,r8,r7
> =C2=A0	std	r8,HSTATE_DECEXP(r13)
> =C2=A0
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/b=
ook3s_hv_rmhandlers.S
> index e571ad277398..718e5581494e 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -183,6 +183,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> =C2=A0kvmppc_primary_no_guest:
> =C2=A0	/* We handle this much like a ceded vcpu */
> =C2=A0	/* put the HDEC into the DEC, since HDEC interrupts don't wake us =
*/
> +
> +	/* XXX: ISA v3 only says the HDEC is at least as large as the DEC, but
> +	=C2=A0*=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0this code assumes we can fit=
 HDEC in DEC. This is probably
> +	=C2=A0*=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0=C2=A0not an issue in practice, bu=
t... */
> =C2=A0	mfspr	r3, SPRN_HDEC
> =C2=A0	mtspr	SPRN_DEC, r3
> =C2=A0	/*
> @@ -249,9 +253,9 @@ kvm_novcpu_wakeup:
> =C2=A0	bge	kvm_novcpu_exit
> =C2=A0
> =C2=A0	/* See if our timeslice has expired (HDEC is negative) */
> -	mfspr	r0, SPRN_HDEC
> +	GET_HDEC(r0);
> =C2=A0	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
> -	cmpwi	r0, 0
> +	cmpdi	r0, 0
> =C2=A0	blt	kvm_novcpu_exit
> =C2=A0
> =C2=A0	/* Got an IPI but other vcpus aren't yet exiting, must be a lateco=
mer */
> @@ -340,8 +344,9 @@ kvm_secondary_got_guest:
> =C2=A0	lbz	r4, HSTATE_PTID(r13)
> =C2=A0	cmpwi	r4, 0
> =C2=A0	bne	63f
> -	lis	r6, 0x7fff
> -	ori	r6, r6, 0xffff
> +
> +	LOAD_REG_ADDR(r6, decrementer_max);
> +	ld	r6,0(r6);
> =C2=A0	mtspr	SPRN_HDEC, r6
> =C2=A0	/* and set per-LPAR registers, if doing dynamic micro-threading */
> =C2=A0	ld	r6, HSTATE_SPLIT_MODE(r13)
> @@ -897,7 +902,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> =C2=A0	mftb	r7
> =C2=A0	subf	r3,r7,r8
> =C2=A0	mtspr	SPRN_DEC,r3
> -	stw	r3,VCPU_DEC(r4)
> +	std	r3,VCPU_DEC(r4)
> =C2=A0
> =C2=A0	ld	r5, VCPU_SPRG0(r4)
> =C2=A0	ld	r6, VCPU_SPRG1(r4)
> @@ -953,8 +958,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> =C2=A0	isync
> =C2=A0
> =C2=A0	/* Check if HDEC expires soon */
> -	mfspr	r3, SPRN_HDEC
> -	cmpwi	r3, 512		/* 1 microsecond */
> +	GET_HDEC(r3)
> +	cmpdi	r3, 512		/* 1 microsecond */
> =C2=A0	blt	hdec_soon
> =C2=A0
> =C2=A0	ld	r6, VCPU_CTR(r4)
> @@ -990,8 +995,9 @@ deliver_guest_interrupt:
> =C2=A0	beq	5f
> =C2=A0	li	r0, BOOK3S_INTERRUPT_EXTERNAL
> =C2=A0	bne	cr1, 12f
> -	mfspr	r0, SPRN_DEC
> -	cmpwi	r0, 0
> +
> +	GET_DEC(r0)
> +	cmpdi	r0, 0

We could just use mfspr DEC here since we are just comparing to 0. =C2=A0It
should work in any mode.

> =C2=A0	li	r0, BOOK3S_INTERRUPT_DECREMENTER
> =C2=A0	bge	5f
> =C2=A0
> @@ -1206,8 +1212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> =C2=A0	/* See if this is a leftover HDEC interrupt */
> =C2=A0	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
> =C2=A0	bne	2f
> -	mfspr	r3,SPRN_HDEC
> -	cmpwi	r3,0
> +	GET_HDEC(r3);
> +	cmpdi	r3,0
> =C2=A0	mr	r4,r9
> =C2=A0	bge	fast_guest_return
> =C2=A02:
> @@ -1326,9 +1332,8 @@ mc_cont:
> =C2=A0	mtspr	SPRN_SPURR,r4
> =C2=A0
> =C2=A0	/* Save DEC */
> -	mfspr	r5,SPRN_DEC
> +	GET_DEC(r5);
> =C2=A0	mftb	r6
> -	extsw	r5,r5
> =C2=A0	add	r5,r5,r6
> =C2=A0	/* r5 is a guest timebase value here, convert to host TB */
> =C2=A0	ld	r3,HSTATE_KVM_VCORE(r13)
> @@ -2250,15 +2255,14 @@ _GLOBAL(kvmppc_h_cede)		/* r3 =3D vcpu pointer, r=
11 =3D msr, r13 =3D paca */
> =C2=A0	=C2=A0* no later than the end of our timeslice (HDEC interrupts
> =C2=A0	=C2=A0* don't wake us from nap).
> =C2=A0	=C2=A0*/
> -	mfspr	r3, SPRN_DEC
> -	mfspr	r4, SPRN_HDEC
> +	GET_DEC(r3);
> +	GET_HDEC(r4);
> =C2=A0	mftb	r5
> -	cmpw	r3, r4
> +	cmpd	r3, r4
> =C2=A0	ble	67f
> =C2=A0	mtspr	SPRN_DEC, r4
> =C2=A067:
> =C2=A0	/* save expiry time of guest decrementer */
> -	extsw	r3, r3
> =C2=A0	add	r3, r3, r5
> =C2=A0	ld	r4, HSTATE_KVM_VCPU(r13)
> =C2=A0	ld	r5, HSTATE_KVM_VCORE(r13)
> diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
> index 5cc2e7af3a7b..5d065c04f2d5 100644
> --- a/arch/powerpc/kvm/emulate.c
> +++ b/arch/powerpc/kvm/emulate.c
> @@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
> =C2=A0	unsigned long dec_nsec;
> =C2=A0	unsigned long long dec_time;
> =C2=A0
> -	pr_debug("mtDEC: %x\n", vcpu->arch.dec);
> +	pr_debug("mtDEC: %llx\n", vcpu->arch.dec);
> =C2=A0	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
> =C2=A0
> =C2=A0#ifdef CONFIG_PPC_BOOK3S
> @@ -47,7 +47,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
> =C2=A0	kvmppc_core_dequeue_dec(vcpu);
> =C2=A0
> =C2=A0	/* POWER4+ triggers a dec interrupt if the value is < 0 */
> -	if (vcpu->arch.dec & 0x80000000) {
> +	if ((s64) vcpu->arch.dec < 0) {
> =C2=A0		kvmppc_core_queue_dec(vcpu);
> =C2=A0		return;
> =C2=A0	}
> @@ -78,7 +78,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
> =C2=A0	vcpu->arch.dec_jiffies =3D get_tb();
> =C2=A0}
> =C2=A0
> -u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
> +u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
> =C2=A0{
> =C2=A0	u64 jd =3D tb - vcpu->arch.dec_jiffies;
> =C2=A0

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: PPC: hypervisor large decrementer support
  2016-06-01  6:23       ` Michael Neuling
@ 2016-06-02 21:46         ` Benjamin Herrenschmidt
  2016-06-07 13:04           ` Michael Ellerman
  2016-06-22  7:30         ` oliver
  1 sibling, 1 reply; 16+ messages in thread
From: Benjamin Herrenschmidt @ 2016-06-02 21:46 UTC (permalink / raw)
  To: Michael Neuling, Oliver O'Halloran, linuxppc-dev; +Cc: Paul Mackerras

On Wed, 2016-06-01 at 16:23 +1000, Michael Neuling wrote:
> FWIW you can use:
>         andis. reg,reg,(LPCR_LD)@ha

@h in that case. Probably the same result but technically @ha is for
arithmetic operations.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] powerpc/timer - large decrementer support
  2016-06-01  5:28     ` [PATCH 1/2] powerpc/timer - " Michael Neuling
@ 2016-06-03  2:01       ` Balbir Singh
  0 siblings, 0 replies; 16+ messages in thread
From: Balbir Singh @ 2016-06-03  2:01 UTC (permalink / raw)
  To: Michael Neuling, Oliver O'Halloran, linuxppc-dev; +Cc: Jack Miller



On 01/06/16 15:28, Michael Neuling wrote:
> On Tue, 2016-05-31 at 17:16 +1000, Oliver O'Halloran wrote:
>> POWER ISA v3 adds large decrementer (LD) mode of operation which
>> increases
>> the size of the decrementer register from 32 bits to an implementation
>> defined with of up to 64 bits.
>>
>> This patch adds support for the LD on processors with the
>> CPU_FTR_ARCH_300
>> cpu feature flag set. For CPUs with this feature LD mode is enabled when
>> when the ibm,dec-bits devicetree property is supplied for the boot CPU.
>> The
>> decrementer value is a signed quantity (with negative values indicating a
>> pending exception) and this property is required to find the maximum
>> positive decrementer value. If this property is not supplied then the
>> traditional decrementer width of 32 bits is assumed and LD mode is
>> disabled.
>>
>> This patch was based on initial work by Jack Miller.
>>
>> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
>> Cc: Michael Neuling <mikey@neuling.org>
> 
> Acked-by: Michael Neuling <mikey@neuling.org>
> 
>> Cc: Balbir Singh <bsingharora@gmail.com>
>> Cc: Jack Miller <jack@codezen.org>

I had reviewed a previous post of this patchset

Reviewed-by: Balbir Singh <bsingharora@gmail.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: PPC: hypervisor large decrementer support
  2016-06-02 21:46         ` Benjamin Herrenschmidt
@ 2016-06-07 13:04           ` Michael Ellerman
  0 siblings, 0 replies; 16+ messages in thread
From: Michael Ellerman @ 2016-06-07 13:04 UTC (permalink / raw)
  To: benh, Michael Neuling, Oliver O'Halloran, linuxppc-dev; +Cc: Paul Mackerras

On Fri, 2016-06-03 at 07:46 +1000, Benjamin Herrenschmidt wrote:
> On Wed, 2016-06-01 at 16:23 +1000, Michael Neuling wrote:
> > FWIW you can use:
> >         andis. reg,reg,(LPCR_LD)@ha
> 
> @h in that case. Probably the same result but technically @ha is for
> arithmetic operations.

In this case it doesn't matter, because LPCR_LD is a single bit constant.

But if bit 15 of your constant happens to be 1, using @ha will mean 1 is added
to the high 16 bits of the value.

eg. if you have:

	#define CONSTANT        0x00208000

        lis     r4,CONSTANT@ha

You get:

	10000118:	21 00 80 3c 	lis     r4,33
	                                           ^
	                                           = 0x21


Documented (sort of) here:

  http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html#RELOC-TYPE
  
  #ha(value) denotes the high adjusted value: bits 16 through 31 of the indicated value, compensating for #lo() being treated as a signed number:
  
      #ha(x) = (((x >> 16) + ((x & 0x8000) ? 1 : 0)) & 0xffff)

cheers

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: PPC: hypervisor large decrementer support
  2016-06-01  6:23       ` Michael Neuling
  2016-06-02 21:46         ` Benjamin Herrenschmidt
@ 2016-06-22  7:30         ` oliver
  1 sibling, 0 replies; 16+ messages in thread
From: oliver @ 2016-06-22  7:30 UTC (permalink / raw)
  To: Michael Neuling; +Cc: linuxppc-dev, Paul Mackerras

On Wed, Jun 1, 2016 at 4:23 PM, Michael Neuling <mikey@neuling.org> wrote:
> On Tue, 2016-05-31 at 17:16 +1000, Oliver O'Halloran wrote:
>> +#define IS_LD_ENABLED(reg)                 \
>> +     mfspr  reg,SPRN_LPCR;              \
>> +     andis. reg,reg,(LPCR_LD >> 16);
>
> FWIW you can use:
>         andis. reg,reg,(LPCR_LD)@ha
>
>> +#define GET_DEC(reg)                       \
>> +     IS_LD_ENABLED(reg);                \
>> +     mfspr reg, SPRN_DEC;               \
>> +     bne 99f;                           \
>> +     extsw reg, reg;                    \
>> +99:
>
> It's a little painful that GET_DEC() is now 2 SPR moves.  SPR moves can be
> a bit expensive.  Probably ok for now, but might be nice to store the guest
> dec LD state somewhere so we don't have to retrieve it from the LPCR SPR.

Seems reasonable. It looks like it stashes the LPCR value in the KVM vcpu
structure already


> Actually, it's probably best to do this now since checking the LD bit in
> the LPCR on P8 is a bit dodgy and unnecessary. There is a kvm->arch.lpcr
> you might be able use for this and you can add an asm-offsets for it too
> (like KVM_LPID).
>
> Is GET_DEC ever run in HV=0, where the guest couldn't read the LPCR?

It's only used in book3s_hv_rmhandlers.S, which contains the real mode h-call
handlers and none of that should be executed outside the host. Moving that
code into there from the generic exception header file is a good idea though.

> Also, this now trashes cr0... have you checked that's ok in the paths it's
> used?

It looks fine, but I'll document that.

>> +
>> +     LOAD_REG_ADDR(r6, decrementer_max);
>> +     ld      r6,0(r6);
>>       mtspr   SPRN_HDEC, r6
>>       /* and set per-LPAR registers, if doing dynamic micro-threading */
>>       ld      r6, HSTATE_SPLIT_MODE(r13)
>> @@ -897,7 +902,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>>       mftb    r7
>>       subf    r3,r7,r8
>>       mtspr   SPRN_DEC,r3
>> -     stw     r3,VCPU_DEC(r4)
>> +     std     r3,VCPU_DEC(r4)
>>
>>       ld      r5, VCPU_SPRG0(r4)
>>       ld      r6, VCPU_SPRG1(r4)
>> @@ -953,8 +958,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>>       isync
>>
>>       /* Check if HDEC expires soon */
>> -     mfspr   r3, SPRN_HDEC
>> -     cmpwi   r3, 512         /* 1 microsecond */
>> +     GET_HDEC(r3)
>> +     cmpdi   r3, 512         /* 1 microsecond */
>>       blt     hdec_soon
>>
>>       ld      r6, VCPU_CTR(r4)
>> @@ -990,8 +995,9 @@ deliver_guest_interrupt:
>>       beq     5f
>>       li      r0, BOOK3S_INTERRUPT_EXTERNAL
>>       bne     cr1, 12f
>> -     mfspr   r0, SPRN_DEC
>> -     cmpwi   r0, 0
>> +
>> +     GET_DEC(r0)
>> +     cmpdi   r0, 0
>
> We could just use mfspr DEC here since we are just comparing to 0.  It
> should work in any mode.

I'm not sure about that. The result of the comparison is used below:

>>       li      r0, BOOK3S_INTERRUPT_DECREMENTER
>>       bge     5f

It's checking for the DEC overflowing rather than checking if it's zero. If
LD=0 the mfspr result would not be sign extended causing the branch to be
taken even if the DEC overflowed.

Anyway I'm thinking I might drop this patch for now and let Balbir post it
as a part of his KVM series when that's ready.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: PPC: hypervisor large decrementer support
  2016-04-12  4:38 ` [PATCH 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
  2016-04-12  4:53   ` kbuild test robot
@ 2016-04-12  7:35   ` Balbir Singh
  1 sibling, 0 replies; 16+ messages in thread
From: Balbir Singh @ 2016-04-12  7:35 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: Paul Mackerras



On 12/04/16 14:38, Oliver O'Halloran wrote:
> Power ISAv3 extends the width of the decrementer register from 32 bits.
> The enlarged register width is implementation dependent, but reads from
> these registers are automatically sign extended to produce a 64 bit output
> when operating in large mode. The HDEC always operates in large mode
> while the DEC register can be operated in 32bit mode or large mode
> depending on the setting of the LPCR.LD bit.
> 
> Currently the hypervisor assumes that reads from the DEC and HDEC register
> produce a 32 bit result which it sign extends to 64 bits using the extsw
> instruction. This behaviour can result in the guest DEC register value
> being corrupted by the hypervisor when the guest is operating in LD mode since
> the results of the extsw instruction only depends on the value of bit
> 31 in the register to be sign extended.
> 
> This patch adds the GET_DEC() and GET_HDEC() assembly macros for reading
> from the decrementer registers. These macros will return the current
> decrementer value as a 64 bit quantity regardless of the Host CPU or
> guest decrementer operating mode. Additionally this patch corrects several
> uses of decrementer values that assume a 32 bit register width.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> Cc: Paul Mackerras <paulus@samba.org>
> ---
>  arch/powerpc/include/asm/exception-64s.h | 22 ++++++++++++++++++
>  arch/powerpc/include/asm/kvm_host.h      |  2 +-
>  arch/powerpc/include/asm/kvm_ppc.h       |  2 +-
>  arch/powerpc/include/uapi/asm/kvm.h      |  2 +-
>  arch/powerpc/kernel/exceptions-64s.S     |  9 +++++++-
>  arch/powerpc/kvm/book3s_hv_interrupts.S  |  3 +--
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 38 ++++++++++++++++++--------------
>  arch/powerpc/kvm/emulate.c               |  4 ++--
>  8 files changed, 57 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
> index 93ae809fe5ea..d922f76c682d 100644
> --- a/arch/powerpc/include/asm/exception-64s.h
> +++ b/arch/powerpc/include/asm/exception-64s.h
> @@ -545,4 +545,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
>  #define FINISH_NAP
>  #endif
>  
> +/* these ensure that we always get a 64bit value from the
> + * decrementer register. */
Comment style does not match recommended style

> +
> +#define IS_LD_ENABLED(reg)                 \
> +	mfspr  reg,SPRN_LPCR;              \
> +	andis. reg,reg,(LPCR_LD >> 16);
> +
> +#define GET_DEC(reg)                       \
> +	IS_LD_ENABLED(reg);                \
> +	mfspr reg, SPRN_DEC;               \
> +	bne 99f;                           \
> +	extsw reg, reg;                    \
> +99:
> +
> +/* For CPUs that support it the Hypervisor LD is
> + * always enabled, so this needs to be feature gated */
> +#define GET_HDEC(reg) \
> +	mfspr reg, SPRN_HDEC;           \
> +BEGIN_FTR_SECTION                       \
> +	extsw reg, reg;                 \
> +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> +
>  #endif	/* _ASM_POWERPC_EXCEPTION_H */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index d7b343170453..6330d3fca083 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -516,7 +516,7 @@ struct kvm_vcpu_arch {
>  	ulong mcsrr0;
>  	ulong mcsrr1;
>  	ulong mcsr;
> -	u32 dec;
> +	u64 dec;
>  #ifdef CONFIG_BOOKE
>  	u32 decar;
>  #endif
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 2544edabe7f3..4de0102930e9 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -94,7 +94,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
>  extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
>  extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
>  extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> -extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> +extern u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
>  extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
>  extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
>  extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index c93cf35ce379..2dd92e841127 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -215,7 +215,7 @@ struct kvm_sregs {
>  			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
>  			__u32 tcr;
>  			__u32 decar;
> -			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
> +			__u64 dec;	/* KVM_SREGS_E_UPDATE_DEC */
>  
>  			/*
>  			 * Userspace can read TB directly, but the
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 7716cebf4b8e..984ae894e758 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -641,7 +641,14 @@ masked_##_H##interrupt:					\
>  	stb	r11,PACAIRQHAPPENED(r13);		\
>  	cmpwi	r10,PACA_IRQ_DEC;			\
>  	bne	1f;					\
> -	lis	r10,0x7fff;				\
> +	mfspr	r10,SPRN_LPCR;				\
> +	andis.	r10,r10,(LPCR_LD >> 16);		\
> +	beq	3f;					\
> +	LOAD_REG_ADDR(r10, decrementer_max);		\

I presume this works because all of this is w.r.t. kernel toc
and not necessarily in the kvm module

> +	ld r10,0(r10);					\
> +	mtspr	SPRN_DEC,r10;				\
> +	b	2f;					\
> +3:	lis	r10,0x7fff;				\
>  	ori	r10,r10,0xffff;				\
>  	mtspr	SPRN_DEC,r10;				\
>  	b	2f;					\
> diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
> index 0fdc4a28970b..b408f72385e4 100644
> --- a/arch/powerpc/kvm/book3s_hv_interrupts.S
> +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
> @@ -121,10 +121,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
>  	 * Put whatever is in the decrementer into the
>  	 * hypervisor decrementer.
>  	 */
> -	mfspr	r8,SPRN_DEC
> +	GET_DEC(r8)
>  	mftb	r7
>  	mtspr	SPRN_HDEC,r8
> -	extsw	r8,r8
>  	add	r8,r8,r7
>  	std	r8,HSTATE_DECEXP(r13)
>  
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index e571ad277398..718e5581494e 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -183,6 +183,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
>  kvmppc_primary_no_guest:
>  	/* We handle this much like a ceded vcpu */
>  	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
> +
> +	/* XXX: ISA v3 only says the HDEC is at least as large as the DEC, but
> +	 *      this code assumes we can fit HDEC in DEC. This is probably
> +	 *      not an issue in practice, but... */
>  	mfspr	r3, SPRN_HDEC
>  	mtspr	SPRN_DEC, r3
>  	/*
> @@ -249,9 +253,9 @@ kvm_novcpu_wakeup:
>  	bge	kvm_novcpu_exit
>  
>  	/* See if our timeslice has expired (HDEC is negative) */
> -	mfspr	r0, SPRN_HDEC
> +	GET_HDEC(r0);
>  	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
> -	cmpwi	r0, 0
> +	cmpdi	r0, 0
>  	blt	kvm_novcpu_exit
>  
>  	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
> @@ -340,8 +344,9 @@ kvm_secondary_got_guest:
>  	lbz	r4, HSTATE_PTID(r13)
>  	cmpwi	r4, 0
>  	bne	63f
> -	lis	r6, 0x7fff
> -	ori	r6, r6, 0xffff
> +
> +	LOAD_REG_ADDR(r6, decrementer_max);
> +	ld	r6,0(r6);
>  	mtspr	SPRN_HDEC, r6
>  	/* and set per-LPAR registers, if doing dynamic micro-threading */
>  	ld	r6, HSTATE_SPLIT_MODE(r13)
> @@ -897,7 +902,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>  	mftb	r7
>  	subf	r3,r7,r8
>  	mtspr	SPRN_DEC,r3
> -	stw	r3,VCPU_DEC(r4)
> +	std	r3,VCPU_DEC(r4)
>  
>  	ld	r5, VCPU_SPRG0(r4)
>  	ld	r6, VCPU_SPRG1(r4)
> @@ -953,8 +958,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>  	isync
>  
>  	/* Check if HDEC expires soon */
> -	mfspr	r3, SPRN_HDEC
> -	cmpwi	r3, 512		/* 1 microsecond */
> +	GET_HDEC(r3)
> +	cmpdi	r3, 512		/* 1 microsecond */
>  	blt	hdec_soon
>  
>  	ld	r6, VCPU_CTR(r4)
> @@ -990,8 +995,9 @@ deliver_guest_interrupt:
>  	beq	5f
>  	li	r0, BOOK3S_INTERRUPT_EXTERNAL
>  	bne	cr1, 12f
> -	mfspr	r0, SPRN_DEC
> -	cmpwi	r0, 0
> +
> +	GET_DEC(r0)
> +	cmpdi	r0, 0
>  	li	r0, BOOK3S_INTERRUPT_DECREMENTER
>  	bge	5f
>  
> @@ -1206,8 +1212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>  	/* See if this is a leftover HDEC interrupt */
>  	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
>  	bne	2f
> -	mfspr	r3,SPRN_HDEC
> -	cmpwi	r3,0
> +	GET_HDEC(r3);
> +	cmpdi	r3,0
>  	mr	r4,r9
>  	bge	fast_guest_return
>  2:
> @@ -1326,9 +1332,8 @@ mc_cont:
>  	mtspr	SPRN_SPURR,r4
>  
>  	/* Save DEC */
> -	mfspr	r5,SPRN_DEC
> +	GET_DEC(r5);
>  	mftb	r6
> -	extsw	r5,r5
>  	add	r5,r5,r6
>  	/* r5 is a guest timebase value here, convert to host TB */
>  	ld	r3,HSTATE_KVM_VCORE(r13)
> @@ -2250,15 +2255,14 @@ _GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
>  	 * no later than the end of our timeslice (HDEC interrupts
>  	 * don't wake us from nap).
>  	 */
> -	mfspr	r3, SPRN_DEC
> -	mfspr	r4, SPRN_HDEC
> +	GET_DEC(r3);
> +	GET_HDEC(r4);
>  	mftb	r5
> -	cmpw	r3, r4
> +	cmpd	r3, r4
>  	ble	67f
>  	mtspr	SPRN_DEC, r4
>  67:
>  	/* save expiry time of guest decrementer */
> -	extsw	r3, r3
>  	add	r3, r3, r5
>  	ld	r4, HSTATE_KVM_VCPU(r13)
>  	ld	r5, HSTATE_KVM_VCORE(r13)
> diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
> index 5cc2e7af3a7b..4d26252162b0 100644
> --- a/arch/powerpc/kvm/emulate.c
> +++ b/arch/powerpc/kvm/emulate.c
> @@ -47,7 +47,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
>  	kvmppc_core_dequeue_dec(vcpu);
>  
>  	/* POWER4+ triggers a dec interrupt if the value is < 0 */
> -	if (vcpu->arch.dec & 0x80000000) {
> +	if ((s64) vcpu->arch.dec < 0) {
>  		kvmppc_core_queue_dec(vcpu);
>  		return;
>  	}
> @@ -78,7 +78,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
>  	vcpu->arch.dec_jiffies = get_tb();
>  }
>  
> -u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
> +u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
>  {
>  	u64 jd = tb - vcpu->arch.dec_jiffies;
>  
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] KVM: PPC: hypervisor large decrementer support
  2016-04-12  4:38 ` [PATCH 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
@ 2016-04-12  4:53   ` kbuild test robot
  2016-04-12  7:35   ` Balbir Singh
  1 sibling, 0 replies; 16+ messages in thread
From: kbuild test robot @ 2016-04-12  4:53 UTC (permalink / raw)
  To: Oliver O'Halloran
  Cc: kbuild-all, linuxppc-dev, Paul Mackerras, Oliver O'Halloran

[-- Attachment #1: Type: text/plain, Size: 4082 bytes --]

Hi Oliver,

[auto build test WARNING on powerpc/next]
[also build test WARNING on v4.6-rc3 next-20160411]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Oliver-O-Halloran/powerpc-timer-large-decrementer-support/20160412-124302
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allmodconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=powerpc 

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:13:0,
                    from include/linux/jiffies.h:5,
                    from arch/powerpc/kvm/emulate.c:21:
   arch/powerpc/kvm/emulate.c: In function 'kvmppc_emulate_dec':
>> arch/powerpc/kvm/emulate.c:42:11: warning: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'u64 {aka long long unsigned int}' [-Wformat=]
     pr_debug("mtDEC: %x\n", vcpu->arch.dec);
              ^
   include/linux/printk.h:236:21: note: in definition of macro 'pr_fmt'
    #define pr_fmt(fmt) fmt
                        ^
   include/linux/printk.h:283:2: note: in expansion of macro 'dynamic_pr_debug'
     dynamic_pr_debug(fmt, ##__VA_ARGS__)
     ^
>> arch/powerpc/kvm/emulate.c:42:2: note: in expansion of macro 'pr_debug'
     pr_debug("mtDEC: %x\n", vcpu->arch.dec);
     ^

vim +42 arch/powerpc/kvm/emulate.c

bbf45ba57 Hollis Blanchard 2008-04-16  15   * Copyright IBM Corp. 2007
dfd4d47e9 Scott Wood       2011-11-17  16   * Copyright 2011 Freescale Semiconductor, Inc.
bbf45ba57 Hollis Blanchard 2008-04-16  17   *
bbf45ba57 Hollis Blanchard 2008-04-16  18   * Authors: Hollis Blanchard <hollisb@us.ibm.com>
bbf45ba57 Hollis Blanchard 2008-04-16  19   */
bbf45ba57 Hollis Blanchard 2008-04-16  20  
bbf45ba57 Hollis Blanchard 2008-04-16 @21  #include <linux/jiffies.h>
544c6761b Alexander Graf   2009-11-02  22  #include <linux/hrtimer.h>
bbf45ba57 Hollis Blanchard 2008-04-16  23  #include <linux/types.h>
bbf45ba57 Hollis Blanchard 2008-04-16  24  #include <linux/string.h>
bbf45ba57 Hollis Blanchard 2008-04-16  25  #include <linux/kvm_host.h>
6e35994d1 Bharat Bhushan   2012-04-18  26  #include <linux/clockchips.h>
bbf45ba57 Hollis Blanchard 2008-04-16  27  
75f74f0db Hollis Blanchard 2008-11-05  28  #include <asm/reg.h>
bbf45ba57 Hollis Blanchard 2008-04-16  29  #include <asm/time.h>
bbf45ba57 Hollis Blanchard 2008-04-16  30  #include <asm/byteorder.h>
bbf45ba57 Hollis Blanchard 2008-04-16  31  #include <asm/kvm_ppc.h>
c381a0431 Hollis Blanchard 2008-11-05  32  #include <asm/disassemble.h>
9123c5ed4 Hongtao Jia      2013-04-28  33  #include <asm/ppc-opcode.h>
73e75b416 Hollis Blanchard 2008-12-02  34  #include "timing.h"
46f43c6ee Marcelo Tosatti  2009-06-18  35  #include "trace.h"
bbf45ba57 Hollis Blanchard 2008-04-16  36  
75f74f0db Hollis Blanchard 2008-11-05  37  void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
bbf45ba57 Hollis Blanchard 2008-04-16  38  {
544c6761b Alexander Graf   2009-11-02  39  	unsigned long dec_nsec;
dc2babfea Bharat Bhushan   2011-10-19  40  	unsigned long long dec_time;
9a7a9b09f Alexander Graf   2009-10-30  41  
544c6761b Alexander Graf   2009-11-02 @42  	pr_debug("mtDEC: %x\n", vcpu->arch.dec);
dfd4d47e9 Scott Wood       2011-11-17  43  	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
dfd4d47e9 Scott Wood       2011-11-17  44  
00c3a37ca Alexander Graf   2010-04-16  45  #ifdef CONFIG_PPC_BOOK3S

:::::: The code at line 42 was first introduced by commit
:::::: 544c6761bb05a1dd19a39cb9bed096273f9bdb36 Use hrtimers for the decrementer

:::::: TO: Alexander Graf <agraf@suse.de>
:::::: CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 48567 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/2] KVM: PPC: hypervisor large decrementer support
  2016-04-12  4:38 Oliver O'Halloran
@ 2016-04-12  4:38 ` Oliver O'Halloran
  2016-04-12  4:53   ` kbuild test robot
  2016-04-12  7:35   ` Balbir Singh
  0 siblings, 2 replies; 16+ messages in thread
From: Oliver O'Halloran @ 2016-04-12  4:38 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Oliver O'Halloran, Paul Mackerras

Power ISAv3 extends the width of the decrementer register from 32 bits.
The enlarged register width is implementation dependent, but reads from
these registers are automatically sign extended to produce a 64 bit output
when operating in large mode. The HDEC always operates in large mode
while the DEC register can be operated in 32bit mode or large mode
depending on the setting of the LPCR.LD bit.

Currently the hypervisor assumes that reads from the DEC and HDEC register
produce a 32 bit result which it sign extends to 64 bits using the extsw
instruction. This behaviour can result in the guest DEC register value
being corrupted by the hypervisor when the guest is operating in LD mode since
the results of the extsw instruction only depends on the value of bit
31 in the register to be sign extended.

This patch adds the GET_DEC() and GET_HDEC() assembly macros for reading
from the decrementer registers. These macros will return the current
decrementer value as a 64 bit quantity regardless of the Host CPU or
guest decrementer operating mode. Additionally this patch corrects several
uses of decrementer values that assume a 32 bit register width.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/exception-64s.h | 22 ++++++++++++++++++
 arch/powerpc/include/asm/kvm_host.h      |  2 +-
 arch/powerpc/include/asm/kvm_ppc.h       |  2 +-
 arch/powerpc/include/uapi/asm/kvm.h      |  2 +-
 arch/powerpc/kernel/exceptions-64s.S     |  9 +++++++-
 arch/powerpc/kvm/book3s_hv_interrupts.S  |  3 +--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 38 ++++++++++++++++++--------------
 arch/powerpc/kvm/emulate.c               |  4 ++--
 8 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 93ae809fe5ea..d922f76c682d 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -545,4 +545,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 #define FINISH_NAP
 #endif
 
+/* these ensure that we always get a 64bit value from the
+ * decrementer register. */
+
+#define IS_LD_ENABLED(reg)                 \
+	mfspr  reg,SPRN_LPCR;              \
+	andis. reg,reg,(LPCR_LD >> 16);
+
+#define GET_DEC(reg)                       \
+	IS_LD_ENABLED(reg);                \
+	mfspr reg, SPRN_DEC;               \
+	bne 99f;                           \
+	extsw reg, reg;                    \
+99:
+
+/* For CPUs that support it the Hypervisor LD is
+ * always enabled, so this needs to be feature gated */
+#define GET_HDEC(reg) \
+	mfspr reg, SPRN_HDEC;           \
+BEGIN_FTR_SECTION                       \
+	extsw reg, reg;                 \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
 #endif	/* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d7b343170453..6330d3fca083 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -516,7 +516,7 @@ struct kvm_vcpu_arch {
 	ulong mcsrr0;
 	ulong mcsrr1;
 	ulong mcsr;
-	u32 dec;
+	u64 dec;
 #ifdef CONFIG_BOOKE
 	u32 decar;
 #endif
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2544edabe7f3..4de0102930e9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -94,7 +94,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
 extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
-extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
+extern u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
 extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c93cf35ce379..2dd92e841127 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -215,7 +215,7 @@ struct kvm_sregs {
 			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
 			__u32 tcr;
 			__u32 decar;
-			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
+			__u64 dec;	/* KVM_SREGS_E_UPDATE_DEC */
 
 			/*
 			 * Userspace can read TB directly, but the
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 7716cebf4b8e..984ae894e758 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -641,7 +641,14 @@ masked_##_H##interrupt:					\
 	stb	r11,PACAIRQHAPPENED(r13);		\
 	cmpwi	r10,PACA_IRQ_DEC;			\
 	bne	1f;					\
-	lis	r10,0x7fff;				\
+	mfspr	r10,SPRN_LPCR;				\
+	andis.	r10,r10,(LPCR_LD >> 16);		\
+	beq	3f;					\
+	LOAD_REG_ADDR(r10, decrementer_max);		\
+	ld r10,0(r10);					\
+	mtspr	SPRN_DEC,r10;				\
+	b	2f;					\
+3:	lis	r10,0x7fff;				\
 	ori	r10,r10,0xffff;				\
 	mtspr	SPRN_DEC,r10;				\
 	b	2f;					\
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..b408f72385e4 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	 * Put whatever is in the decrementer into the
 	 * hypervisor decrementer.
 	 */
-	mfspr	r8,SPRN_DEC
+	GET_DEC(r8)
 	mftb	r7
 	mtspr	SPRN_HDEC,r8
-	extsw	r8,r8
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e571ad277398..718e5581494e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -183,6 +183,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+
+	/* XXX: ISA v3 only says the HDEC is at least as large as the DEC, but
+	 *      this code assumes we can fit HDEC in DEC. This is probably
+	 *      not an issue in practice, but... */
 	mfspr	r3, SPRN_HDEC
 	mtspr	SPRN_DEC, r3
 	/*
@@ -249,9 +253,9 @@ kvm_novcpu_wakeup:
 	bge	kvm_novcpu_exit
 
 	/* See if our timeslice has expired (HDEC is negative) */
-	mfspr	r0, SPRN_HDEC
+	GET_HDEC(r0);
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	cmpwi	r0, 0
+	cmpdi	r0, 0
 	blt	kvm_novcpu_exit
 
 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -340,8 +344,9 @@ kvm_secondary_got_guest:
 	lbz	r4, HSTATE_PTID(r13)
 	cmpwi	r4, 0
 	bne	63f
-	lis	r6, 0x7fff
-	ori	r6, r6, 0xffff
+
+	LOAD_REG_ADDR(r6, decrementer_max);
+	ld	r6,0(r6);
 	mtspr	SPRN_HDEC, r6
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
@@ -897,7 +902,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mftb	r7
 	subf	r3,r7,r8
 	mtspr	SPRN_DEC,r3
-	stw	r3,VCPU_DEC(r4)
+	std	r3,VCPU_DEC(r4)
 
 	ld	r5, VCPU_SPRG0(r4)
 	ld	r6, VCPU_SPRG1(r4)
@@ -953,8 +958,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	isync
 
 	/* Check if HDEC expires soon */
-	mfspr	r3, SPRN_HDEC
-	cmpwi	r3, 512		/* 1 microsecond */
+	GET_HDEC(r3)
+	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
 	ld	r6, VCPU_CTR(r4)
@@ -990,8 +995,9 @@ deliver_guest_interrupt:
 	beq	5f
 	li	r0, BOOK3S_INTERRUPT_EXTERNAL
 	bne	cr1, 12f
-	mfspr	r0, SPRN_DEC
-	cmpwi	r0, 0
+
+	GET_DEC(r0)
+	cmpdi	r0, 0
 	li	r0, BOOK3S_INTERRUPT_DECREMENTER
 	bge	5f
 
@@ -1206,8 +1212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
-	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,0
+	GET_HDEC(r3);
+	cmpdi	r3,0
 	mr	r4,r9
 	bge	fast_guest_return
 2:
@@ -1326,9 +1332,8 @@ mc_cont:
 	mtspr	SPRN_SPURR,r4
 
 	/* Save DEC */
-	mfspr	r5,SPRN_DEC
+	GET_DEC(r5);
 	mftb	r6
-	extsw	r5,r5
 	add	r5,r5,r6
 	/* r5 is a guest timebase value here, convert to host TB */
 	ld	r3,HSTATE_KVM_VCORE(r13)
@@ -2250,15 +2255,14 @@ _GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
 	 * no later than the end of our timeslice (HDEC interrupts
 	 * don't wake us from nap).
 	 */
-	mfspr	r3, SPRN_DEC
-	mfspr	r4, SPRN_HDEC
+	GET_DEC(r3);
+	GET_HDEC(r4);
 	mftb	r5
-	cmpw	r3, r4
+	cmpd	r3, r4
 	ble	67f
 	mtspr	SPRN_DEC, r4
 67:
 	/* save expiry time of guest decrementer */
-	extsw	r3, r3
 	add	r3, r3, r5
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 5cc2e7af3a7b..4d26252162b0 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -47,7 +47,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	kvmppc_core_dequeue_dec(vcpu);
 
 	/* POWER4+ triggers a dec interrupt if the value is < 0 */
-	if (vcpu->arch.dec & 0x80000000) {
+	if ((s64) vcpu->arch.dec < 0) {
 		kvmppc_core_queue_dec(vcpu);
 		return;
 	}
@@ -78,7 +78,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	vcpu->arch.dec_jiffies = get_tb();
 }
 
-u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
 {
 	u64 jd = tb - vcpu->arch.dec_jiffies;
 
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-06-22  7:30 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-10  4:57 [PATCH v3 1/2] powerpc/timer - large decrementer support Oliver O'Halloran
2016-05-10  4:57 ` [PATCH v3 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
2016-05-11  5:05 ` [PATCH v3 1/2] powerpc/timer - " Balbir Singh
2016-05-31  6:25 ` Michael Neuling
2016-05-31  7:05   ` oliver
2016-05-31  7:16   ` [PATCH " Oliver O'Halloran
2016-05-31  7:16     ` [PATCH 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
2016-06-01  6:23       ` Michael Neuling
2016-06-02 21:46         ` Benjamin Herrenschmidt
2016-06-07 13:04           ` Michael Ellerman
2016-06-22  7:30         ` oliver
2016-06-01  5:28     ` [PATCH 1/2] powerpc/timer - " Michael Neuling
2016-06-03  2:01       ` Balbir Singh
  -- strict thread matches above, loose matches on Subject: below --
2016-04-12  4:38 Oliver O'Halloran
2016-04-12  4:38 ` [PATCH 2/2] KVM: PPC: hypervisor " Oliver O'Halloran
2016-04-12  4:53   ` kbuild test robot
2016-04-12  7:35   ` Balbir Singh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.