All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Woodhouse <dwmw2@infradead.org>
To: kvm@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	Ankur Arora <ankur.a.arora@oracle.com>,
	Joao Martins <joao.m.martins@oracle.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Sean Christopherson <seanjc@google.com>
Subject: [PATCH 14/15] KVM: x86/xen: register runstate info
Date: Fri,  4 Dec 2020 01:18:47 +0000	[thread overview]
Message-ID: <20201204011848.2967588-15-dwmw2@infradead.org> (raw)
In-Reply-To: <20201204011848.2967588-1-dwmw2@infradead.org>

From: Joao Martins <joao.m.martins@oracle.com>

Allow emulator to register vcpu runstates which allow Xen guests
to use that for steal clock. The 'preempted' state of KVM steal clock
equates to 'runnable' state, 'running' has similar meanings for both and
'offline' is used when system admin needs to bring vcpu offline or
hotplug.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/kvm_host.h |   5 ++
 arch/x86/kvm/x86.c              |  10 +++
 arch/x86/kvm/xen.c              | 123 +++++++++++++++++++++++++++++++-
 arch/x86/kvm/xen.h              |   9 ++-
 include/uapi/linux/kvm.h        |   1 +
 5 files changed, 144 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ec9425289209..d8716ef27728 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -527,6 +527,11 @@ struct kvm_vcpu_xen {
 	struct vcpu_info *vcpu_info;
 	struct kvm_host_map pv_time_map;
 	struct pvclock_vcpu_time_info *pv_time;
+	struct kvm_host_map runstate_map;
+	void *runstate;
+	uint32_t current_runstate;
+	uint64_t last_steal;
+	uint64_t last_state_ns;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a15748e3aa8..17ae827ae8cc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2935,6 +2935,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	struct kvm_host_map map;
 	struct kvm_steal_time *st;
 
+	if (vcpu->arch.xen.runstate) {
+		kvm_xen_setup_runstate_page(vcpu);
+		return;
+	}
+
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
@@ -3962,6 +3967,11 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 	struct kvm_host_map map;
 	struct kvm_steal_time *st;
 
+	if (vcpu->arch.xen.runstate) {
+		kvm_xen_runstate_set_preempted(vcpu);
+		return;
+	}
+
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 5c67d9038651..e49e59f93828 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -11,9 +11,11 @@
 #include "hyperv.h"
 
 #include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
 
 #include <trace/events/kvm.h>
 #include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
 
 #include "trace.h"
 
@@ -131,6 +133,98 @@ static void kvm_xen_update_vcpu_time(struct kvm_vcpu *v,
 	guest_hv_clock->version = vcpu->hv_clock.version;
 }
 
+static void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state, u64 steal_ns)
+{
+	struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+	struct compat_vcpu_runstate_info *runstate;
+	u32 *runstate_state;
+	u64 now, delta;
+
+	BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+		     offsetof(struct compat_vcpu_runstate_info, state));
+	BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+		     sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+	runstate = vcpu_xen->runstate;
+	runstate_state = &runstate->state;
+
+#ifdef CONFIG_64BIT
+	/*
+	 * The only different is alignment of uint64_t in 32-bit.
+	 * So the first field 'state' is accessed via *runstate_state
+	 * which is unmodified, while the other fields are accessed
+	 * through 'runstate->' which we tweak here by adding 4.
+	 */
+	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+		     offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+		     offsetof(struct compat_vcpu_runstate_info, time) + 4);
+	if (vcpu->kvm->arch.xen.long_mode)
+		runstate = ((void *)runstate) + 4;
+#endif
+	/*
+	 * Although it's called "state_entry_time" and explicitly documented
+	 * as being "the system time at which the VCPU was last scheduled to
+	 * run", Xen just treats it as a counter for HVM domains too.
+	 */
+	runstate->state_entry_time = XEN_RUNSTATE_UPDATE |
+		(runstate->state_entry_time + 1);
+	smp_wmb();
+
+	now = ktime_get_ns();
+	delta = now - vcpu_xen->last_state_ns - steal_ns;
+
+	*runstate_state = state;
+	runstate->time[vcpu_xen->current_runstate] += delta;
+	if (steal_ns)
+		runstate->time[RUNSTATE_runnable] += steal_ns;
+	smp_wmb();
+	vcpu_xen->current_runstate = state;
+	vcpu_xen->last_state_ns = now;
+
+	runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+	smp_wmb();
+}
+
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+	int new_state;
+
+	BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+		     offsetof(struct compat_vcpu_runstate_info, state));
+	BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+		     sizeof(((struct compat_vcpu_runstate_info *)0)->state));
+
+	if (vcpu->preempted) {
+		new_state = RUNSTATE_runnable;
+	} else {
+		new_state = RUNSTATE_blocked;
+		vcpu_xen->last_steal = current->sched_info.run_delay;
+	}
+
+	kvm_xen_update_runstate(vcpu, new_state, 0);
+}
+
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+	u64 steal_time = 0;
+
+	/*
+	 * If the CPU was blocked when it last stopped, presumably
+	 * it became unblocked at some point because it's being run
+	 * again now. The scheduler run_delay is the runnable time,
+	 * to be subtracted from the blocked time.
+	 */
+	if (vcpu_xen->current_runstate == RUNSTATE_blocked)
+		steal_time = current->sched_info.run_delay - vcpu_xen->last_steal;
+
+	kvm_xen_update_runstate(vcpu, RUNSTATE_running, steal_time);
+}
+
 void kvm_xen_setup_pvclock_page(struct kvm_vcpu *v)
 {
 	struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(v);
@@ -167,6 +261,15 @@ static int vcpu_attr_loc(struct kvm_vcpu *vcpu, u16 type,
 		if (sz)
 			*sz = sizeof(struct pvclock_vcpu_time_info);
 		return 0;
+
+	case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+		*map = &vcpu->arch.xen.runstate_map;
+		*hva = (void **)&vcpu->arch.xen.runstate;
+		if (sz)
+			*sz = vcpu->kvm->arch.xen.long_mode ?
+				sizeof(struct shared_info) :
+				sizeof(struct compat_shared_info);
+		return 0;
 	}
 	return -EINVAL;
 }
@@ -191,6 +294,10 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		break;
 	}
 
+	case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
+		if (unlikely(!sched_info_on()))
+			return -ENOTSUPP;
+	/* fallthrough */
 	case KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO:
 	case KVM_XEN_ATTR_TYPE_VCPU_INFO: {
 		gpa_t gpa = data->u.vcpu_attr.gpa;
@@ -208,9 +315,13 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 			return r;
 
 		r = kvm_xen_map_guest_page(kvm, map, hva, gpa, sz);
-		if (!r)
-			kvm_xen_setup_pvclock_page(v);
-
+		if (!r) {
+			if (data->type == KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE) {
+				v->arch.xen.current_runstate = RUNSTATE_runnable;
+				v->arch.xen.last_state_ns = ktime_get_ns();
+			} else
+				kvm_xen_setup_pvclock_page(v);
+		}
 		break;
 	}
 
@@ -239,6 +350,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		break;
 	}
 
+	case KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE:
 	case KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO:
 	case KVM_XEN_ATTR_TYPE_VCPU_INFO: {
 		struct kvm_host_map *map;
@@ -414,6 +526,11 @@ void kvm_xen_vcpu_uninit(struct kvm_vcpu *vcpu)
 			      NULL, true, false);
 		vcpu_xen->pv_time = NULL;
 	}
+	if (vcpu_xen->runstate) {
+		kvm_unmap_gfn(vcpu->kvm, &vcpu_xen->runstate_map,
+			      NULL, true, false);
+		vcpu_xen->runstate = NULL;
+	}
 }
 
 void kvm_xen_destroy_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index 6d09b46d3c2e..42a9cc9f49a4 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -23,6 +23,8 @@ static inline struct kvm_vcpu *xen_vcpu_to_vcpu(struct kvm_vcpu_xen *xen_vcpu)
 }
 
 void kvm_xen_setup_pvclock_page(struct kvm_vcpu *vcpu);
+void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
+void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
@@ -70,7 +72,12 @@ struct compat_shared_info {
 	uint32_t evtchn_mask[sizeof(compat_ulong_t) * 8];
 	struct pvclock_wall_clock wc;
 	struct compat_arch_shared_info arch;
-
 };
 
+struct compat_vcpu_runstate_info {
+    int state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+} __attribute__((packed));
+
 #endif /* __ARCH_X86_KVM_XEN_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8a1914a9e206..cb2777c37ae5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1599,6 +1599,7 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_ATTR_TYPE_SHARED_INFO		0x1
 #define KVM_XEN_ATTR_TYPE_VCPU_INFO		0x2
 #define KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO	0x3
+#define KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE		0x4
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
-- 
2.26.2


  parent reply	other threads:[~2020-12-04  1:21 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-04  1:18 [PATCH 00/15] KVM: Add Xen hypercall and shared info pages David Woodhouse
2020-12-04  1:18 ` [PATCH 01/15] KVM: Fix arguments to kvm_{un,}map_gfn() David Woodhouse
2020-12-04 18:27   ` Alexander Graf
2020-12-04 19:02     ` David Woodhouse
2020-12-04  1:18 ` [PATCH 02/15] KVM: x86/xen: fix Xen hypercall page msr handling David Woodhouse
2020-12-04 18:26   ` Alexander Graf
2020-12-04 18:54     ` David Woodhouse
2020-12-04  1:18 ` [PATCH 03/15] KVM: x86/xen: intercept xen hypercalls if enabled David Woodhouse
2020-12-04 18:26   ` Alexander Graf
2020-12-04 18:58     ` David Woodhouse
2020-12-05 18:42   ` Joao Martins
2020-12-05 18:51     ` David Woodhouse
2020-12-05 19:13       ` Joao Martins
2020-12-04  1:18 ` [PATCH 04/15] KVM: x86/xen: Fix coexistence of Xen and Hyper-V hypercalls David Woodhouse
2020-12-04 18:34   ` Alexander Graf
2020-12-04 19:04     ` David Woodhouse
2020-12-04  1:18 ` [PATCH 05/15] KVM: x86/xen: add KVM_XEN_HVM_SET_ATTR/KVM_XEN_HVM_GET_ATTR David Woodhouse
2020-12-04  1:18 ` [PATCH 06/15] KVM: x86/xen: latch long_mode when hypercall page is set up David Woodhouse
2020-12-04 18:38   ` Alexander Graf
2020-12-04 19:08     ` David Woodhouse
2020-12-04  1:18 ` [PATCH 07/15] KVM: x86/xen: add definitions of compat_shared_info, compat_vcpu_info David Woodhouse
2020-12-05 18:43   ` Joao Martins
2020-12-05 19:48     ` David Woodhouse
2020-12-04  1:18 ` [PATCH 08/15] KVM: x86/xen: register shared_info page David Woodhouse
2020-12-04  1:18 ` [PATCH 09/15] KVM: x86/xen: setup pvclock updates David Woodhouse
2020-12-04  1:18 ` [PATCH 10/15] xen: add wc_sec_hi to struct shared_info David Woodhouse
2020-12-04  1:18 ` [PATCH 11/15] KVM: x86/xen: update wallclock region David Woodhouse
2020-12-04  1:18 ` [PATCH 12/15] KVM: x86/xen: register vcpu info David Woodhouse
2020-12-04  1:18 ` [PATCH 13/15] KVM: x86/xen: register vcpu time info region David Woodhouse
2020-12-04  1:18 ` David Woodhouse [this message]
2020-12-04  1:18 ` [PATCH 15/15] KVM: x86: declare Xen HVM shared info capability and add test case David Woodhouse
2020-12-04  9:11 ` [PATCH 16/15] KVM: Add documentation for Xen hypercall and shared_info updates David Woodhouse
2020-12-05 10:48 ` [PATCH 00/15] KVM: Add Xen hypercall and shared info pages David Woodhouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201204011848.2967588-15-dwmw2@infradead.org \
    --to=dwmw2@infradead.org \
    --cc=ankur.a.arora@oracle.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.