kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hikaru Nishida <hikalium@chromium.org>
To: linux-kernel@vger.kernel.org, dme@dme.org, tglx@linutronix.de,
	mlevitsk@redhat.com
Cc: suleiman@google.com, Hikaru Nishida <hikalium@chromium.org>,
	Borislav Petkov <bp@alien8.de>, "H. Peter Anvin" <hpa@zytor.com>,
	Ingo Molnar <mingo@redhat.com>, Jim Mattson <jmattson@google.com>,
	Joerg Roedel <joro@8bytes.org>,
	John Stultz <john.stultz@linaro.org>,
	Juergen Gross <jgross@suse.com>,
	Lai Jiangshan <laijs@linux.alibaba.com>,
	Mike Travis <mike.travis@hpe.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>,
	Sean Christopherson <seanjc@google.com>,
	Stephen Boyd <sboyd@kernel.org>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	kvm@vger.kernel.org, x86@kernel.org
Subject: [v2 PATCH 4/4] x86/kvm: Add guest side support for virtual suspend time injection
Date: Fri,  6 Aug 2021 19:07:10 +0900	[thread overview]
Message-ID: <20210806190607.v2.4.I2cbcd43256eacc3c92274adff6d0458b6a9c15ee@changeid> (raw)
In-Reply-To: <20210806100710.2425336-1-hikalium@chromium.org>

This patch implements virtual suspend time injection support for kvm
guests. If this functionality is enabled and the host supports
KVM_FEATURE_HOST_SUSPEND_TIME,
the guest will register struct kvm_host_suspend_time through MSR to
get how much time the guest spend during the host's suspension.
Host will notify the update on the structure (which happens if the host
went into suspension while the guest was running) through the irq and
the irq will trigger the adjustment of CLOCK_BOOTTIME inside a guest.

Before this patch, there was no way to adjust the CLOCK_BOOTTIME without
actually suspending the kernel. However, some guest applications rely on
the fact that there will be some difference between CLOCK_BOOTTIME and
CLOCK_MONOTONIC after the suspention of the execution and they will be
broken if we just pausing the guest instead of actually suspending them.
Pausing the guest kernels is one solution to solve the problem, but
if we could adjust the clocks without actually suspending them, we can
reduce the overhead of guest's suspend/resume cycles on every host's
suspensions. So this change will be useful for the devices which
experience suspend/resume frequently.

Signed-off-by: Hikaru Nishida <hikalium@chromium.org>
---

 arch/x86/Kconfig                    | 13 ++++++++++
 arch/x86/include/asm/idtentry.h     |  4 +++
 arch/x86/include/asm/kvm_para.h     |  9 +++++++
 arch/x86/kernel/kvmclock.c          | 40 +++++++++++++++++++++++++++++
 include/linux/timekeeper_internal.h |  4 +++
 kernel/time/timekeeping.c           | 33 ++++++++++++++++++++++++
 6 files changed, 103 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 45463c65ea0a..760fe7f04170 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -822,6 +822,19 @@ config KVM_GUEST
 	  underlying device model, the host provides the guest with
 	  timing infrastructure such as time of day, and system time
 
+config KVM_VIRT_SUSPEND_TIMING_GUEST
+	bool "Virtual suspend time injection (guest side)"
+	depends on KVM_GUEST
+	default n
+	help
+	 This option makes the host's suspension reflected on the guest's clocks.
+	 In other words, guest's CLOCK_MONOTONIC will stop and
+	 CLOCK_BOOTTIME keeps running during the host's suspension.
+	 This feature will only be effective when both guest and host enable
+	 this option.
+
+	 If unsure, say N.
+
 config ARCH_CPUIDLE_HALTPOLL
 	def_bool n
 	prompt "Disable host haltpoll when loading haltpoll driver"
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 1345088e9902..38f37c2a6063 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -671,6 +671,10 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR,	sysvec_kvm_posted_intr_wakeup
 DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR,	sysvec_kvm_posted_intr_nested_ipi);
 #endif
 
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+DECLARE_IDTENTRY_SYSVEC(VIRT_SUSPEND_TIMING_VECTOR, sysvec_virtual_suspend_time);
+#endif
+
 #if IS_ENABLED(CONFIG_HYPERV)
 DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR,	sysvec_hyperv_callback);
 DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR,	sysvec_hyperv_reenlightenment);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 69299878b200..094023687c8b 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -16,6 +16,15 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 }
 #endif /* CONFIG_KVM_GUEST */
 
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+u64 kvm_get_suspend_time(void);
+#else
+static inline u64 kvm_get_suspend_time(void)
+{
+	return 0;
+}
+#endif /* CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST */
+
 #define KVM_HYPERCALL \
         ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)
 
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ad273e5861c1..1c92b54b1bce 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -16,11 +16,15 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/set_memory.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
 
 #include <asm/hypervisor.h>
 #include <asm/mem_encrypt.h>
 #include <asm/x86_init.h>
 #include <asm/kvmclock.h>
+#include <asm/desc.h>
+#include <asm/idtentry.h>
 
 static int kvmclock __initdata = 1;
 static int kvmclock_vsyscall __initdata = 1;
@@ -48,6 +52,9 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
 
 static struct pvclock_vsyscall_time_info
 			hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+static struct kvm_suspend_time suspend_time __bss_decrypted;
+#endif
 static struct pvclock_wall_clock wall_clock __bss_decrypted;
 static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
 static struct pvclock_vsyscall_time_info *hvclock_mem;
@@ -163,6 +170,18 @@ static int kvm_cs_enable(struct clocksource *cs)
 	return 0;
 }
 
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+/*
+ * kvm_get_suspend_time - This function returns total time passed during
+ * the host was in a suspend state while this guest was running.
+ * (Not a duration of the last host suspension but cumulative time.)
+ */
+u64 kvm_get_suspend_time(void)
+{
+	return suspend_time.suspend_time_ns;
+}
+#endif /* CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST */
+
 struct clocksource kvm_clock = {
 	.name	= "kvm-clock",
 	.read	= kvm_clock_get_cycles,
@@ -290,6 +309,18 @@ static int kvmclock_setup_percpu(unsigned int cpu)
 	return p ? 0 : -ENOMEM;
 }
 
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+void timekeeping_inject_virtual_suspend_time(void);
+DEFINE_IDTENTRY_SYSVEC(sysvec_virtual_suspend_time)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	timekeeping_inject_virtual_suspend_time();
+
+	set_irq_regs(old_regs);
+}
+#endif
+
 void __init kvmclock_init(void)
 {
 	u8 flags;
@@ -304,6 +335,15 @@ void __init kvmclock_init(void)
 		return;
 	}
 
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+	if (kvm_para_has_feature(KVM_FEATURE_HOST_SUSPEND_TIME)) {
+		alloc_intr_gate(VIRT_SUSPEND_TIMING_VECTOR, asm_sysvec_virtual_suspend_time);
+		/* Register the suspend time structure */
+		wrmsrl(MSR_KVM_HOST_SUSPEND_TIME,
+		       slow_virt_to_phys(&suspend_time) | KVM_MSR_ENABLED);
+	}
+#endif
+
 	if (cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "kvmclock:setup_percpu",
 			      kvmclock_setup_percpu, NULL) < 0) {
 		return;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 84ff2844df2a..a5fd515f0a9d 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -124,6 +124,10 @@ struct timekeeper {
 	u32			ntp_err_mult;
 	/* Flag used to avoid updating NTP twice with same second */
 	u32			skip_second_overflow;
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+	/* suspend_time_injected keeps the duration injected through kvm */
+	u64			suspend_time_injected;
+#endif
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 	long			last_warning;
 	/*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3ac3fb479981..424c61d38646 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2125,6 +2125,39 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 	return offset;
 }
 
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+/*
+ * timekeeping_inject_virtual_suspend_time - Inject virtual suspend time
+ * when requested by the kvm host.
+ * This function should be called under irq context.
+ */
+void timekeeping_inject_virtual_suspend_time(void)
+{
+	/*
+	 * Only updates shadow_timekeeper so the change will be reflected
+	 * on the next call of timekeeping_advance().
+	 */
+	struct timekeeper *tk = &shadow_timekeeper;
+	unsigned long flags;
+	struct timespec64 delta;
+	u64 suspend_time;
+
+	raw_spin_lock_irqsave(&timekeeper_lock, flags);
+	suspend_time = kvm_get_suspend_time();
+	if (suspend_time > tk->suspend_time_injected) {
+		/*
+		 * Do injection only if the time is not injected yet.
+		 * suspend_time and tk->suspend_time_injected values are
+		 * cummrative, so take a diff and inject the duration.
+		 */
+		delta = ns_to_timespec64(suspend_time - tk->suspend_time_injected);
+		__timekeeping_inject_sleeptime(tk, &delta);
+		tk->suspend_time_injected = suspend_time;
+	}
+	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+}
+#endif
+
 /*
  * timekeeping_advance - Updates the timekeeper to the current time and
  * current NTP tick length
-- 
2.32.0.605.g8dce9f2422-goog


  parent reply	other threads:[~2021-08-06 10:08 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-06 10:07 [v2 PATCH 0/4] x86/kvm: Virtual suspend time injection support Hikaru Nishida
2021-08-06 10:07 ` [v2 PATCH 1/4] x86/kvm: Reserve KVM_FEATURE_HOST_SUSPEND_TIME and MSR_KVM_HOST_SUSPEND_TIME Hikaru Nishida
2021-08-06 10:07 ` [v2 PATCH 2/4] x86/kvm: Add definitions for virtual suspend time injection Hikaru Nishida
2021-08-10 15:14   ` Thomas Gleixner
2021-08-13 18:36   ` Guenter Roeck
2021-08-06 10:07 ` [v2 PATCH 3/4] x86/kvm: Add host side support " Hikaru Nishida
2021-08-10 15:21   ` Thomas Gleixner
2021-08-10 15:24   ` Thomas Gleixner
2021-08-14  7:22   ` Paolo Bonzini
2021-08-18  9:32     ` Vitaly Kuznetsov
2021-08-18 20:49       ` Paolo Bonzini
2021-08-06 10:07 ` Hikaru Nishida [this message]
2021-08-10 15:48   ` [v2 PATCH 4/4] x86/kvm: Add guest " Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210806190607.v2.4.I2cbcd43256eacc3c92274adff6d0458b6a9c15ee@changeid \
    --to=hikalium@chromium.org \
    --cc=bp@alien8.de \
    --cc=dme@dme.org \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=jmattson@google.com \
    --cc=john.stultz@linaro.org \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=laijs@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mike.travis@hpe.com \
    --cc=mingo@redhat.com \
    --cc=mlevitsk@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=sboyd@kernel.org \
    --cc=seanjc@google.com \
    --cc=suleiman@google.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).