All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Price <steven.price@arm.com>
To: kvmarm@lists.cs.columbia.edu, linux-arm-kernel@lists.infradead.org
Cc: Marc Zyngier <marc.zyngier@arm.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will.deacon@arm.com>,
	Steven Price <steven.price@arm.com>
Subject: [RFC PATCH v2 11/12] clocksource: arm_arch_timer: Use paravirtualized LPT
Date: Wed, 12 Dec 2018 15:02:25 +0000	[thread overview]
Message-ID: <20181212150226.38051-12-steven.price@arm.com> (raw)
In-Reply-To: <20181212150226.38051-1-steven.price@arm.com>

Enable paravirtualized time to be used in a KVM guest if the host
supports it. This allows the guest to derive a counter which is clocked
at a persistent rate even when the guest is migrated.

If we discover that the system supports SMCCC v1.1 then we probe to
determine whether the hypervisor supports paravirtualized features and
finally whether it supports "Live Physical Time" reporting. If so a
shared structure is made available to the guest containing coefficients
to calculate the derived clock.

The guest kernel uses the coefficients to present a clock to user space
that is always clocked at the same rate whenever the guest is running
('live'), even if the physical clock changes (due to the guest being
migrated).

The existing workaround framework for CNTVCT is used to disable the VDSO
and trap user space accesses to the timer registers so we can present the
derived clock.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/arm64/include/asm/arch_timer.h  |  32 ++++-
 arch/arm64/kernel/cpuinfo.c          |   2 +-
 drivers/clocksource/arm_arch_timer.c | 177 ++++++++++++++++++++++++++-
 3 files changed, 205 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index f2a234d6516c..ec0e7250c453 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -20,12 +20,14 @@
 #define __ASM_ARCH_TIMER_H
 
 #include <asm/barrier.h>
+#include <asm/pvclock-abi.h>
 #include <asm/sysreg.h>
 
 #include <linux/bug.h>
 #include <linux/init.h>
 #include <linux/jump_label.h>
 #include <linux/smp.h>
+#include <linux/static_key.h>
 #include <linux/types.h>
 
 #include <clocksource/arm_arch_timer.h>
@@ -79,6 +81,19 @@ DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
 	_val;								\
 })
 
+void pvclock_reg_write_cntv_tval_el0(u32 val);
+extern struct static_key_false arch_counter_cntfrq_ool_enabled;
+extern u64 pvclock_get_cntfrq(void);
+extern struct static_key_false arch_counter_cntvct_ool_enabled;
+extern u64 pvclock_get_cntvct(void);
+
+static __always_inline void __write_cntv_tval_el0(u32 val)
+{
+	if (static_branch_unlikely(&arch_counter_cntvct_ool_enabled))
+		return pvclock_reg_write_cntv_tval_el0(val);
+	write_sysreg(val, cntv_tval_el0);
+}
+
 /*
  * These register accessors are marked inline so the compiler can
  * nicely work out which register we want, and chuck away the rest of
@@ -102,7 +117,7 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 			write_sysreg(val, cntv_ctl_el0);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			write_sysreg(val, cntv_tval_el0);
+			__write_cntv_tval_el0(val);
 			break;
 		}
 	}
@@ -134,7 +149,10 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 
 static inline u32 arch_timer_get_cntfrq(void)
 {
-	return read_sysreg(cntfrq_el0);
+	if (static_branch_unlikely(&arch_counter_cntfrq_ool_enabled))
+		return pvclock_get_cntfrq();
+	else
+		return read_sysreg(cntfrq_el0);
 }
 
 static inline u32 arch_timer_get_cntkctl(void)
@@ -154,12 +172,20 @@ static inline u64 arch_counter_get_cntpct(void)
 	return arch_timer_reg_read_stable(cntpct_el0);
 }
 
-static inline u64 arch_counter_get_cntvct(void)
+static inline u64 __arch_counter_get_cntvct(void)
 {
 	isb();
 	return arch_timer_reg_read_stable(cntvct_el0);
 }
 
+static inline u64 arch_counter_get_cntvct(void)
+{
+	if (static_branch_unlikely(&arch_counter_cntvct_ool_enabled))
+		return pvclock_get_cntvct();
+	else
+		return __arch_counter_get_cntvct();
+}
+
 static inline int arch_timer_arch_init(void)
 {
 	return 0;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index bcc2831399cb..74410727829d 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -324,7 +324,7 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 
 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 {
-	info->reg_cntfrq = arch_timer_get_cntfrq();
+	info->reg_cntfrq = read_cpuid(CNTFRQ_EL0);
 	/*
 	 * Use the effective value of the CTR_EL0 than the raw value
 	 * exposed by the CPU. CTR_E0.IDC field value must be interpreted
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 9a7d4dc00b6e..6e84e1acc4f4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -11,6 +11,7 @@
 
 #define pr_fmt(fmt)	"arm_arch_timer: " fmt
 
+#include <linux/arm-smccc.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -23,6 +24,8 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/io.h>
+#include <linux/psci.h>
+#include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
@@ -86,6 +89,171 @@ static int __init early_evtstrm_cfg(char *buf)
 }
 early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg);
 
+#ifdef CONFIG_ARM64
+/* Paravirtualised time is only supported for 64 bit */
+static struct pvclock_vm_time_info *pvclock_vm_time_info;
+
+DEFINE_STATIC_KEY_FALSE(arch_counter_cntvct_ool_enabled);
+EXPORT_SYMBOL_GPL(arch_counter_cntvct_ool_enabled);
+DEFINE_STATIC_KEY_FALSE(arch_counter_cntfrq_ool_enabled);
+EXPORT_SYMBOL_GPL(arch_counter_cntfrq_ool_enabled);
+
+static inline u64 native_to_pv_cycles(const struct pvclock_vm_time_info *info,
+		u64 cnt)
+{
+	u32 shift = le32_to_cpu(info->shift);
+	u64 scale_mult = le64_to_cpu(info->scale_mult);
+
+	cnt <<= shift;
+	return mul_u64_u64_shr(scale_mult, cnt, 64);
+}
+
+static inline u64 pv_to_native_cycles(const struct pvclock_vm_time_info *info,
+		u64 cnt)
+{
+	u64 native_freq = le64_to_cpu(info->native_freq);
+	u64 pv_freq = le64_to_cpu(info->pv_freq);
+	u64 div_by_pv_freq_mult = le64_to_cpu(info->div_by_pv_freq_mult);
+
+	cnt = native_freq * cnt + pv_freq - 1;
+	return mul_u64_u64_shr(div_by_pv_freq_mult, cnt, 64);
+}
+
+u64 pvclock_get_cntvct(void)
+{
+	u64 cval;
+	__le64 seq_begin, seq_end;
+
+	do {
+		seq_begin = READ_ONCE(pvclock_vm_time_info->sequence_number);
+
+		barrier();
+
+		cval = __arch_counter_get_cntvct();
+		cval = native_to_pv_cycles(pvclock_vm_time_info, cval);
+
+		barrier();
+		seq_end = READ_ONCE(pvclock_vm_time_info->sequence_number);
+	} while (unlikely(seq_begin != seq_end));
+
+	return cval;
+}
+
+u64 pvclock_get_cntfrq(void)
+{
+	return le64_to_cpu(pvclock_vm_time_info->pv_freq);
+}
+
+static void arch_timer_pvclock_init(void)
+{
+	struct arm_smccc_res res;
+	void *kaddr;
+
+	if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+		return;
+
+	arm_smccc_1_1_call(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			   ARM_SMCCC_HV_PV_FEATURES, &res);
+
+	if (res.a0 != SMCCC_RET_SUCCESS)
+		return;
+
+	arm_smccc_1_1_call(ARM_SMCCC_HV_PV_FEATURES,
+			   ARM_SMCCC_HV_PV_TIME_LPT, &res);
+
+	if ((s32)res.a0 < 0)
+		return;
+
+	arm_smccc_1_1_call(ARM_SMCCC_HV_PV_TIME_LPT, 0, &res);
+
+	if ((s64)res.a0 < 0)
+		return;
+
+	kaddr = memremap(res.a0,
+			sizeof(struct pvclock_vm_time_info),
+			MEMREMAP_WB);
+
+	if (!kaddr) {
+		pr_warn("Failed to map LPT structure for paravirtualized clock\n");
+		return;
+	}
+
+	pvclock_vm_time_info = kaddr;
+
+	static_branch_enable(&arch_counter_cntvct_ool_enabled);
+	static_branch_enable(&arch_counter_cntfrq_ool_enabled);
+
+	pr_info("Using paravirtualized clock\n");
+}
+
+static inline bool pvclock_trap_cntvct(void)
+{
+	return static_branch_unlikely(&arch_counter_cntvct_ool_enabled);
+}
+
+static inline void arch_timer_reg_write_cntv_tval(u32 val,
+						  struct arch_timer *timer)
+{
+	__le64 seq_begin, seq_end;
+
+	if (!static_branch_unlikely(&arch_counter_cntvct_ool_enabled)) {
+		writel_relaxed(val, timer->base + CNTV_TVAL);
+		return;
+	}
+
+	do {
+		u32 n_val;
+
+		seq_begin = READ_ONCE(pvclock_vm_time_info->sequence_number);
+
+		barrier();
+
+		n_val = pv_to_native_cycles(pvclock_vm_time_info, val);
+
+		writel_relaxed(n_val, timer->base + CNTV_TVAL);
+		barrier();
+
+		seq_end = READ_ONCE(pvclock_vm_time_info->sequence_number);
+	} while (unlikely(seq_begin != seq_end));
+}
+
+void pvclock_reg_write_cntv_tval_el0(u32 val)
+{
+	__le64 seq_begin, seq_end;
+
+	do {
+		u32 n_val;
+
+		seq_begin = READ_ONCE(pvclock_vm_time_info->sequence_number);
+
+		barrier();
+
+		n_val = pv_to_native_cycles(pvclock_vm_time_info, val);
+
+		write_sysreg(n_val, cntv_tval_el0);
+		barrier();
+
+		seq_end = READ_ONCE(pvclock_vm_time_info->sequence_number);
+	} while (unlikely(seq_begin != seq_end));
+}
+
+#else /* CONFIG_ARM64 */
+static void arch_timer_pvclock_init(void)
+{
+}
+
+static inline bool pvclock_trap_cntvct(void)
+{
+	return false;
+}
+
+static inline void arch_timer_reg_write_cntv_tval(u32 val,
+						 struct arch_timer *timer)
+{
+	writel_relaxed(val, timer->base + CNTV_TVAL);
+}
+#endif /* CONFIG_ARM64 */
+
 /*
  * Architected system timer support.
  */
@@ -111,7 +279,7 @@ void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
 			writel_relaxed(val, timer->base + CNTV_CTL);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			writel_relaxed(val, timer->base + CNTV_TVAL);
+			arch_timer_reg_write_cntv_tval(val, timer);
 			break;
 		}
 	} else {
@@ -589,6 +757,7 @@ static bool arch_timer_this_cpu_has_cntvct_wa(void)
 #define erratum_set_next_event_tval_phys(...)		({BUG(); 0;})
 #define erratum_handler(fn, r, ...)			({false;})
 #define arch_timer_this_cpu_has_cntvct_wa()		({false;})
+
 #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
 
 static __always_inline irqreturn_t timer_handler(const int access,
@@ -815,7 +984,7 @@ static void arch_counter_set_user_access(void)
 	 * need to be workaround. The vdso may have been already
 	 * disabled though.
 	 */
-	if (arch_timer_this_cpu_has_cntvct_wa())
+	if (pvclock_trap_cntvct() || arch_timer_this_cpu_has_cntvct_wa())
 		pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id());
 	else
 		cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
@@ -1222,6 +1391,8 @@ static int __init arch_timer_of_init(struct device_node *np)
 
 	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
 
+	arch_timer_pvclock_init();
+
 	rate = arch_timer_get_cntfrq();
 	arch_timer_of_configure_rate(rate, np);
 
@@ -1552,6 +1723,8 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
 
 	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
 
+	arch_timer_pvclock_init();
+
 	/*
 	 * When probing via ACPI, we have no mechanism to override the sysreg
 	 * CNTFRQ value. This *must* be correct.
-- 
2.19.2

WARNING: multiple messages have this Message-ID (diff)
From: Steven Price <steven.price@arm.com>
To: kvmarm@lists.cs.columbia.edu, linux-arm-kernel@lists.infradead.org
Cc: Mark Rutland <mark.rutland@arm.com>,
	Marc Zyngier <marc.zyngier@arm.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will.deacon@arm.com>,
	Christoffer Dall <christoffer.dall@arm.com>,
	Steven Price <steven.price@arm.com>
Subject: [RFC PATCH v2 11/12] clocksource: arm_arch_timer: Use paravirtualized LPT
Date: Wed, 12 Dec 2018 15:02:25 +0000	[thread overview]
Message-ID: <20181212150226.38051-12-steven.price@arm.com> (raw)
In-Reply-To: <20181212150226.38051-1-steven.price@arm.com>

Enable paravirtualized time to be used in a KVM guest if the host
supports it. This allows the guest to derive a counter which is clocked
at a persistent rate even when the guest is migrated.

If we discover that the system supports SMCCC v1.1 then we probe to
determine whether the hypervisor supports paravirtualized features and
finally whether it supports "Live Physical Time" reporting. If so a
shared structure is made available to the guest containing coefficients
to calculate the derived clock.

The guest kernel uses the coefficients to present a clock to user space
that is always clocked at the same rate whenever the guest is running
('live'), even if the physical clock changes (due to the guest being
migrated).

The existing workaround framework for CNTVCT is used to disable the VDSO
and trap user space accesses to the timer registers so we can present the
derived clock.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/arm64/include/asm/arch_timer.h  |  32 ++++-
 arch/arm64/kernel/cpuinfo.c          |   2 +-
 drivers/clocksource/arm_arch_timer.c | 177 ++++++++++++++++++++++++++-
 3 files changed, 205 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index f2a234d6516c..ec0e7250c453 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -20,12 +20,14 @@
 #define __ASM_ARCH_TIMER_H
 
 #include <asm/barrier.h>
+#include <asm/pvclock-abi.h>
 #include <asm/sysreg.h>
 
 #include <linux/bug.h>
 #include <linux/init.h>
 #include <linux/jump_label.h>
 #include <linux/smp.h>
+#include <linux/static_key.h>
 #include <linux/types.h>
 
 #include <clocksource/arm_arch_timer.h>
@@ -79,6 +81,19 @@ DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
 	_val;								\
 })
 
+void pvclock_reg_write_cntv_tval_el0(u32 val);
+extern struct static_key_false arch_counter_cntfrq_ool_enabled;
+extern u64 pvclock_get_cntfrq(void);
+extern struct static_key_false arch_counter_cntvct_ool_enabled;
+extern u64 pvclock_get_cntvct(void);
+
+static __always_inline void __write_cntv_tval_el0(u32 val)
+{
+	if (static_branch_unlikely(&arch_counter_cntvct_ool_enabled))
+		return pvclock_reg_write_cntv_tval_el0(val);
+	write_sysreg(val, cntv_tval_el0);
+}
+
 /*
  * These register accessors are marked inline so the compiler can
  * nicely work out which register we want, and chuck away the rest of
@@ -102,7 +117,7 @@ void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
 			write_sysreg(val, cntv_ctl_el0);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			write_sysreg(val, cntv_tval_el0);
+			__write_cntv_tval_el0(val);
 			break;
 		}
 	}
@@ -134,7 +149,10 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 
 static inline u32 arch_timer_get_cntfrq(void)
 {
-	return read_sysreg(cntfrq_el0);
+	if (static_branch_unlikely(&arch_counter_cntfrq_ool_enabled))
+		return pvclock_get_cntfrq();
+	else
+		return read_sysreg(cntfrq_el0);
 }
 
 static inline u32 arch_timer_get_cntkctl(void)
@@ -154,12 +172,20 @@ static inline u64 arch_counter_get_cntpct(void)
 	return arch_timer_reg_read_stable(cntpct_el0);
 }
 
-static inline u64 arch_counter_get_cntvct(void)
+static inline u64 __arch_counter_get_cntvct(void)
 {
 	isb();
 	return arch_timer_reg_read_stable(cntvct_el0);
 }
 
+static inline u64 arch_counter_get_cntvct(void)
+{
+	if (static_branch_unlikely(&arch_counter_cntvct_ool_enabled))
+		return pvclock_get_cntvct();
+	else
+		return __arch_counter_get_cntvct();
+}
+
 static inline int arch_timer_arch_init(void)
 {
 	return 0;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index bcc2831399cb..74410727829d 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -324,7 +324,7 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 
 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 {
-	info->reg_cntfrq = arch_timer_get_cntfrq();
+	info->reg_cntfrq = read_cpuid(CNTFRQ_EL0);
 	/*
 	 * Use the effective value of the CTR_EL0 than the raw value
 	 * exposed by the CPU. CTR_E0.IDC field value must be interpreted
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 9a7d4dc00b6e..6e84e1acc4f4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -11,6 +11,7 @@
 
 #define pr_fmt(fmt)	"arm_arch_timer: " fmt
 
+#include <linux/arm-smccc.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -23,6 +24,8 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/io.h>
+#include <linux/psci.h>
+#include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
@@ -86,6 +89,171 @@ static int __init early_evtstrm_cfg(char *buf)
 }
 early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg);
 
+#ifdef CONFIG_ARM64
+/* Paravirtualised time is only supported for 64 bit */
+static struct pvclock_vm_time_info *pvclock_vm_time_info;
+
+DEFINE_STATIC_KEY_FALSE(arch_counter_cntvct_ool_enabled);
+EXPORT_SYMBOL_GPL(arch_counter_cntvct_ool_enabled);
+DEFINE_STATIC_KEY_FALSE(arch_counter_cntfrq_ool_enabled);
+EXPORT_SYMBOL_GPL(arch_counter_cntfrq_ool_enabled);
+
+static inline u64 native_to_pv_cycles(const struct pvclock_vm_time_info *info,
+		u64 cnt)
+{
+	u32 shift = le32_to_cpu(info->shift);
+	u64 scale_mult = le64_to_cpu(info->scale_mult);
+
+	cnt <<= shift;
+	return mul_u64_u64_shr(scale_mult, cnt, 64);
+}
+
+static inline u64 pv_to_native_cycles(const struct pvclock_vm_time_info *info,
+		u64 cnt)
+{
+	u64 native_freq = le64_to_cpu(info->native_freq);
+	u64 pv_freq = le64_to_cpu(info->pv_freq);
+	u64 div_by_pv_freq_mult = le64_to_cpu(info->div_by_pv_freq_mult);
+
+	cnt = native_freq * cnt + pv_freq - 1;
+	return mul_u64_u64_shr(div_by_pv_freq_mult, cnt, 64);
+}
+
+u64 pvclock_get_cntvct(void)
+{
+	u64 cval;
+	__le64 seq_begin, seq_end;
+
+	do {
+		seq_begin = READ_ONCE(pvclock_vm_time_info->sequence_number);
+
+		barrier();
+
+		cval = __arch_counter_get_cntvct();
+		cval = native_to_pv_cycles(pvclock_vm_time_info, cval);
+
+		barrier();
+		seq_end = READ_ONCE(pvclock_vm_time_info->sequence_number);
+	} while (unlikely(seq_begin != seq_end));
+
+	return cval;
+}
+
+u64 pvclock_get_cntfrq(void)
+{
+	return le64_to_cpu(pvclock_vm_time_info->pv_freq);
+}
+
+static void arch_timer_pvclock_init(void)
+{
+	struct arm_smccc_res res;
+	void *kaddr;
+
+	if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+		return;
+
+	arm_smccc_1_1_call(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			   ARM_SMCCC_HV_PV_FEATURES, &res);
+
+	if (res.a0 != SMCCC_RET_SUCCESS)
+		return;
+
+	arm_smccc_1_1_call(ARM_SMCCC_HV_PV_FEATURES,
+			   ARM_SMCCC_HV_PV_TIME_LPT, &res);
+
+	if ((s32)res.a0 < 0)
+		return;
+
+	arm_smccc_1_1_call(ARM_SMCCC_HV_PV_TIME_LPT, 0, &res);
+
+	if ((s64)res.a0 < 0)
+		return;
+
+	kaddr = memremap(res.a0,
+			sizeof(struct pvclock_vm_time_info),
+			MEMREMAP_WB);
+
+	if (!kaddr) {
+		pr_warn("Failed to map LPT structure for paravirtualized clock\n");
+		return;
+	}
+
+	pvclock_vm_time_info = kaddr;
+
+	static_branch_enable(&arch_counter_cntvct_ool_enabled);
+	static_branch_enable(&arch_counter_cntfrq_ool_enabled);
+
+	pr_info("Using paravirtualized clock\n");
+}
+
+static inline bool pvclock_trap_cntvct(void)
+{
+	return static_branch_unlikely(&arch_counter_cntvct_ool_enabled);
+}
+
+static inline void arch_timer_reg_write_cntv_tval(u32 val,
+						  struct arch_timer *timer)
+{
+	__le64 seq_begin, seq_end;
+
+	if (!static_branch_unlikely(&arch_counter_cntvct_ool_enabled)) {
+		writel_relaxed(val, timer->base + CNTV_TVAL);
+		return;
+	}
+
+	do {
+		u32 n_val;
+
+		seq_begin = READ_ONCE(pvclock_vm_time_info->sequence_number);
+
+		barrier();
+
+		n_val = pv_to_native_cycles(pvclock_vm_time_info, val);
+
+		writel_relaxed(n_val, timer->base + CNTV_TVAL);
+		barrier();
+
+		seq_end = READ_ONCE(pvclock_vm_time_info->sequence_number);
+	} while (unlikely(seq_begin != seq_end));
+}
+
+void pvclock_reg_write_cntv_tval_el0(u32 val)
+{
+	__le64 seq_begin, seq_end;
+
+	do {
+		u32 n_val;
+
+		seq_begin = READ_ONCE(pvclock_vm_time_info->sequence_number);
+
+		barrier();
+
+		n_val = pv_to_native_cycles(pvclock_vm_time_info, val);
+
+		write_sysreg(n_val, cntv_tval_el0);
+		barrier();
+
+		seq_end = READ_ONCE(pvclock_vm_time_info->sequence_number);
+	} while (unlikely(seq_begin != seq_end));
+}
+
+#else /* CONFIG_ARM64 */
+static void arch_timer_pvclock_init(void)
+{
+}
+
+static inline bool pvclock_trap_cntvct(void)
+{
+	return false;
+}
+
+static inline void arch_timer_reg_write_cntv_tval(u32 val,
+						 struct arch_timer *timer)
+{
+	writel_relaxed(val, timer->base + CNTV_TVAL);
+}
+#endif /* CONFIG_ARM64 */
+
 /*
  * Architected system timer support.
  */
@@ -111,7 +279,7 @@ void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
 			writel_relaxed(val, timer->base + CNTV_CTL);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			writel_relaxed(val, timer->base + CNTV_TVAL);
+			arch_timer_reg_write_cntv_tval(val, timer);
 			break;
 		}
 	} else {
@@ -589,6 +757,7 @@ static bool arch_timer_this_cpu_has_cntvct_wa(void)
 #define erratum_set_next_event_tval_phys(...)		({BUG(); 0;})
 #define erratum_handler(fn, r, ...)			({false;})
 #define arch_timer_this_cpu_has_cntvct_wa()		({false;})
+
 #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
 
 static __always_inline irqreturn_t timer_handler(const int access,
@@ -815,7 +984,7 @@ static void arch_counter_set_user_access(void)
 	 * need to be workaround. The vdso may have been already
 	 * disabled though.
 	 */
-	if (arch_timer_this_cpu_has_cntvct_wa())
+	if (pvclock_trap_cntvct() || arch_timer_this_cpu_has_cntvct_wa())
 		pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id());
 	else
 		cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
@@ -1222,6 +1391,8 @@ static int __init arch_timer_of_init(struct device_node *np)
 
 	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
 
+	arch_timer_pvclock_init();
+
 	rate = arch_timer_get_cntfrq();
 	arch_timer_of_configure_rate(rate, np);
 
@@ -1552,6 +1723,8 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
 
 	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
 
+	arch_timer_pvclock_init();
+
 	/*
 	 * When probing via ACPI, we have no mechanism to override the sysreg
 	 * CNTFRQ value. This *must* be correct.
-- 
2.19.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2018-12-12 15:03 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-12 15:02 [RFC PATCH v2 00/12] arm64: Paravirtualized time support Steven Price
2018-12-12 15:02 ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 01/12] KVM: arm64: Document PV-time interface Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 02/12] KVM: arm/arm64: Factor out hypercall handling from PSCI code Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 03/12] arm/arm64: Provide a wrapper for SMCCC 1.1 calls Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 04/12] arm/arm64: Make use of the SMCCC 1.1 wrapper Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 05/12] KVM: arm64: Implement PV_FEATURES call Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 06/12] KVM: arm64: Support stolen time reporting via shared structure Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 07/12] arm64: Retrieve stolen time as paravirtualized guest Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 08/12] KVM: Allow kvm_device_ops to be const Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 09/12] KVM: arm64: Provide a PV_TIME device to user space Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 10/12] KVM: arm64: Support Live Physical Time reporting Steven Price
2018-12-12 15:02   ` Steven Price
2018-12-12 15:02 ` Steven Price [this message]
2018-12-12 15:02   ` [RFC PATCH v2 11/12] clocksource: arm_arch_timer: Use paravirtualized LPT Steven Price
2018-12-12 15:02 ` [RFC PATCH v2 12/12] KVM: arm64: Export LPT using PV_TIME device Steven Price
2018-12-12 15:02   ` Steven Price

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181212150226.38051-12-steven.price@arm.com \
    --to=steven.price@arm.com \
    --cc=catalin.marinas@arm.com \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=marc.zyngier@arm.com \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.