All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH  0/2] Fix VMX preemption timer migration
@ 2020-05-18 20:15 Makarand Sonare
  2020-05-18 20:15 ` [PATCH 1/2] KVM: nVMX: " Makarand Sonare
  2020-05-18 20:16 ` [PATCH 2/2] KVM: selftests: VMX preemption timer migration test Makarand Sonare
  0 siblings, 2 replies; 8+ messages in thread
From: Makarand Sonare @ 2020-05-18 20:15 UTC (permalink / raw)
  To: kvm, pshier, jmattson; +Cc: Makarand Sonare

Fix VMX preemption timer migration. Add a selftest to ensure post migration
both L1 and L2 VM observe the VMX preemption timer exit close to the original
expiration deadline.

Makarand Sonare (1):
  KVM: selftests: VMX preemption timer migration test

Peter Shier (1):
  KVM: nVMX: Fix VMX preemption timer migration

 Documentation/virt/kvm/api.rst                |   4 +
 arch/x86/include/uapi/asm/kvm.h               |   2 +
 arch/x86/kvm/vmx/nested.c                     |  61 ++++-
 arch/x86/kvm/vmx/vmx.h                        |   1 +
 arch/x86/kvm/x86.c                            |   3 +-
 include/uapi/linux/kvm.h                      |   1 +
 tools/arch/x86/include/uapi/asm/kvm.h         |   3 +
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../testing/selftests/kvm/include/kvm_util.h  |   2 +
 .../selftests/kvm/include/x86_64/processor.h  |  11 +-
 .../selftests/kvm/include/x86_64/vmx.h        |  27 ++
 .../kvm/x86_64/vmx_preemption_timer_test.c    | 256 ++++++++++++++++++
 13 files changed, 362 insertions(+), 11 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c

--
2.26.2.761.g0e0b3e54be-goog


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH  1/2] KVM: nVMX: Fix VMX preemption timer migration
  2020-05-18 20:15 [PATCH 0/2] Fix VMX preemption timer migration Makarand Sonare
@ 2020-05-18 20:15 ` Makarand Sonare
  2020-05-19  0:49   ` Vitaly Kuznetsov
  2020-05-19  7:18   ` Krish Sadhukhan
  2020-05-18 20:16 ` [PATCH 2/2] KVM: selftests: VMX preemption timer migration test Makarand Sonare
  1 sibling, 2 replies; 8+ messages in thread
From: Makarand Sonare @ 2020-05-18 20:15 UTC (permalink / raw)
  To: kvm, pshier, jmattson; +Cc: Makarand Sonare

From: Peter Shier <pshier@google.com>

Add new field to hold preemption timer expiration deadline
appended to struct kvm_vmx_nested_state_data. This is to prevent
the first VM-Enter after migration from incorrectly restarting the timer
with the full timer value instead of partially decayed timer value.
KVM_SET_NESTED_STATE restarts timer using migrated state regardless
of whether L1 sets VM_EXIT_SAVE_VMX_PREEMPTION_TIMER.

Fixes: cf8b84f48a593 ("kvm: nVMX: Prepare for checkpointing L2 state")

Signed-off-by: Peter Shier <pshier@google.com>
Signed-off-by: Makarand Sonare <makarandsonare@google.com>
Change-Id: I6446aba5a547afa667f0ef4620b1b76115bf3753
---
 Documentation/virt/kvm/api.rst        |  4 ++
 arch/x86/include/uapi/asm/kvm.h       |  2 +
 arch/x86/kvm/vmx/nested.c             | 61 ++++++++++++++++++++++++---
 arch/x86/kvm/vmx/vmx.h                |  1 +
 arch/x86/kvm/x86.c                    |  3 +-
 include/uapi/linux/kvm.h              |  1 +
 tools/arch/x86/include/uapi/asm/kvm.h |  2 +
 7 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index d871dacb984e9..b410815772970 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4326,6 +4326,9 @@ Errors:
   #define KVM_STATE_NESTED_RUN_PENDING		0x00000002
   #define KVM_STATE_NESTED_EVMCS		0x00000004
 
+  /* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
+  #define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
+
   #define KVM_STATE_NESTED_FORMAT_VMX		0
   #define KVM_STATE_NESTED_FORMAT_SVM		1
 
@@ -4346,6 +4349,7 @@ Errors:
   struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+	__u64 preemption_timer_deadline;
   };
 
 This ioctl copies the vcpu's nested virtualization state from the kernel to
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c650..20d5832bab215 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -391,6 +391,7 @@ struct kvm_sync_regs {
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
 #define KVM_STATE_NESTED_EVMCS		0x00000004
 #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
+#define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
@@ -400,6 +401,7 @@ struct kvm_sync_regs {
 struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+	__u64 preemption_timer_deadline;
 };
 
 struct kvm_vmx_nested_state_hdr {
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 51ebb60e1533a..badb82a39ac04 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2092,9 +2092,9 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
+					u64 preemption_timeout)
 {
-	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	/*
@@ -3353,8 +3353,24 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 	 * the timer.
 	 */
 	vmx->nested.preemption_timer_expired = false;
-	if (nested_cpu_has_preemption_timer(vmcs12))
-		vmx_start_preemption_timer(vcpu);
+	if (nested_cpu_has_preemption_timer(vmcs12)) {
+		u64 timer_value;
+		u64 l1_tsc_value = kvm_read_l1_tsc(vcpu, rdtsc());
+
+		if (from_vmentry) {
+			timer_value = vmcs12->vmx_preemption_timer_value;
+			vmx->nested.preemption_timer_deadline = timer_value +
+				(l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);
+		} else {
+			if ((l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE) >
+			    vmx->nested.preemption_timer_deadline)
+				timer_value = 0;
+			else
+				timer_value = vmx->nested.preemption_timer_deadline -
+					(l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);
+		}
+		vmx_start_preemption_timer(vcpu, timer_value);
+	}
 
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -3962,9 +3978,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
 
 	if (nested_cpu_has_preemption_timer(vmcs12) &&
-	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+	    !vmx->nested.nested_run_pending) {
+		if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
 			vmcs12->vmx_preemption_timer_value =
 				vmx_get_preemption_timer_value(vcpu);
+	}
 
 	/*
 	 * In some cases (usually, nested EPT), L2 is allowed to change its
@@ -5939,6 +5957,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 
 			if (vmx->nested.mtf_pending)
 				kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
+
+			if (nested_cpu_has_preemption_timer(vmcs12)) {
+				kvm_state.flags |=
+					KVM_STATE_NESTED_PREEMPTION_TIMER;
+				kvm_state.size = offsetof(struct kvm_nested_state, data.vmx) +
+						 offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_deadline);
+			}
 		}
 	}
 
@@ -5970,6 +5995,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 
 	BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
 	BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
+	BUILD_BUG_ON(sizeof(user_vmx_nested_state->preemption_timer_deadline)
+		    != sizeof(vmx->nested.preemption_timer_deadline));
+
 
 	/*
 	 * Copy over the full allocated size of vmcs12 rather than just the size
@@ -5985,6 +6013,12 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 			return -EFAULT;
 	}
 
+	if (kvm_state.flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
+		if (put_user(vmx->nested.preemption_timer_deadline,
+			     &user_vmx_nested_state->preemption_timer_deadline))
+			return -EFAULT;
+	}
+
 out:
 	return kvm_state.size;
 }
@@ -6056,7 +6090,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 	 */
 	if (is_smm(vcpu) ?
 		(kvm_state->flags &
-		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
+		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING |
+		  KVM_STATE_NESTED_PREEMPTION_TIMER))
 		: kvm_state->hdr.vmx.smm.flags)
 		return -EINVAL;
 
@@ -6146,6 +6181,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 			goto error_guest_mode;
 	}
 
+	if (kvm_state->flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
+
+		if (kvm_state->size <
+		    offsetof(struct kvm_nested_state, data.vmx) +
+		    offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_deadline))
+			goto error_guest_mode;
+
+		if (get_user(vmx->nested.preemption_timer_deadline,
+			     &user_vmx_nested_state->preemption_timer_deadline)) {
+			ret = -EFAULT;
+			goto error_guest_mode;
+		}
+	}
+
 	if (nested_vmx_check_controls(vcpu, vmcs12) ||
 	    nested_vmx_check_host_state(vcpu, vmcs12) ||
 	    nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 298ddef79d009..db697400755fb 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -169,6 +169,7 @@ struct nested_vmx {
 	u16 posted_intr_nv;
 
 	struct hrtimer preemption_timer;
+	u64 preemption_timer_deadline;
 	bool preemption_timer_expired;
 
 	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 471fccf7f8501..ba9e62ffbb4cd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3418,6 +3418,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MSR_PLATFORM_INFO:
 	case KVM_CAP_EXCEPTION_PAYLOAD:
 	case KVM_CAP_SET_GUEST_DEBUG:
+	case KVM_CAP_NESTED_STATE_PREEMPTION_TIMER:
 		r = 1;
 		break;
 	case KVM_CAP_SYNC_REGS:
@@ -4626,7 +4627,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
 		if (kvm_state.flags &
 		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
-		      | KVM_STATE_NESTED_EVMCS))
+		      | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_PREEMPTION_TIMER))
 			break;
 
 		/* nested_run_pending implies guest_mode.  */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ac9eba0289d1b..0868dce12a715 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1018,6 +1018,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_PROTECTED 180
 #define KVM_CAP_PPC_SECURE_GUEST 181
 #define KVM_CAP_HALT_POLL 182
+#define KVM_CAP_NESTED_STATE_PREEMPTION_TIMER 183
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c650..60701178b9cc1 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -391,6 +391,8 @@ struct kvm_sync_regs {
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
 #define KVM_STATE_NESTED_EVMCS		0x00000004
 #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
+/* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
+#define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
-- 
2.26.2.761.g0e0b3e54be-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH  2/2] KVM: selftests: VMX preemption timer migration test
  2020-05-18 20:15 [PATCH 0/2] Fix VMX preemption timer migration Makarand Sonare
  2020-05-18 20:15 ` [PATCH 1/2] KVM: nVMX: " Makarand Sonare
@ 2020-05-18 20:16 ` Makarand Sonare
  2020-05-19  0:56   ` Vitaly Kuznetsov
  2020-05-20  2:14   ` Krish Sadhukhan
  1 sibling, 2 replies; 8+ messages in thread
From: Makarand Sonare @ 2020-05-18 20:16 UTC (permalink / raw)
  To: kvm, pshier, jmattson; +Cc: Makarand Sonare

When a nested VM with a VMX-preemption timer is migrated, verify that the
nested VM and its parent VM observe the VMX-preemption timer exit close to
the original expiration deadline.

Signed-off-by: Makarand Sonare <makarandsonare@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Change-Id: Ia79337c682ee161399525edc34201fad473fc190
---
 tools/arch/x86/include/uapi/asm/kvm.h         |   1 +
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../testing/selftests/kvm/include/kvm_util.h  |   2 +
 .../selftests/kvm/include/x86_64/processor.h  |  11 +-
 .../selftests/kvm/include/x86_64/vmx.h        |  27 ++
 .../kvm/x86_64/vmx_preemption_timer_test.c    | 256 ++++++++++++++++++
 7 files changed, 295 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c

diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 60701178b9cc1..13dca545554dc 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -402,6 +402,7 @@ struct kvm_sync_regs {
 struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+	__u64 preemption_timer_deadline;
 };
 
 struct kvm_vmx_nested_state_hdr {
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 222e50104296a..f159718f90c0a 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -10,6 +10,7 @@
 /x86_64/set_sregs_test
 /x86_64/smm_test
 /x86_64/state_test
+/x86_64/vmx_preemption_timer_test
 /x86_64/svm_vmcall_test
 /x86_64/sync_regs_test
 /x86_64/vmx_close_while_nested_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index c66f4eec34111..780f0c189a7bc 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -20,6 +20,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
 TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index e244c6ecfc1d5..919e161dd2893 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -314,6 +314,8 @@ void ucall_uninit(struct kvm_vm *vm);
 void ucall(uint64_t cmd, int nargs, ...);
 uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
 
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
+				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
 #define GUEST_DONE()		ucall(UCALL_DONE, 0)
 #define __GUEST_ASSERT(_condition, _nargs, _args...) do {	\
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 7428513a4c687..7cb19eca6c72d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc)
 static inline uint64_t rdtsc(void)
 {
 	uint32_t eax, edx;
-
+	uint32_t tsc_val;
 	/*
 	 * The lfence is to wait (on Intel CPUs) until all previous
-	 * instructions have been executed.
+	 * instructions have been executed. If software requires RDTSC to be
+	 * executed prior to execution of any subsequent instruction, it can
+	 * execute LFENCE immediately after RDTSC
 	 */
-	__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
-	return ((uint64_t)edx) << 32 | eax;
+	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+	tsc_val = ((uint64_t)edx) << 32 | eax;
+	return tsc_val;
 }
 
 static inline uint64_t rdtscp(uint32_t *aux)
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 3d27069b9ed9c..ccff3e6e27048 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -575,6 +575,33 @@ struct vmx_pages {
 	void *eptp;
 };
 
+union vmx_basic {
+	u64 val;
+	struct {
+		u32 revision;
+		u32	size:13,
+			reserved1:3,
+			width:1,
+			dual:1,
+			type:4,
+			insouts:1,
+			ctrl:1,
+			vm_entry_exception_ctrl:1,
+			reserved2:7;
+	};
+};
+
+union vmx_ctrl_msr {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+};
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
 struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
 bool prepare_for_vmx_operation(struct vmx_pages *vmx);
 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
new file mode 100644
index 0000000000000..10893b11511be
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID		5
+#define PREEMPTION_TIMER_VALUE			100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1	 80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+void l2_guest_code(void)
+{
+	u64 vmx_pt_delta;
+
+	vmcall();
+	l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	//
+	// Wait until the 1st threshold has passed
+	//
+	do {
+		l2_vmx_pt_finish = rdtsc();
+		vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+				vmx_pt_rate;
+	} while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+	//
+	// Force L2 through Save and Restore cycle
+	//
+	GUEST_SYNC(1);
+
+	l2_save_restore_done = 1;
+
+	//
+	// Now wait for the preemption timer to fire and
+	// exit to L1
+	//
+	while ((l2_vmx_pt_finish = rdtsc()))
+		;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	u64 l1_vmx_pt_start;
+	u64 l1_vmx_pt_finish;
+	u64 l1_tsc_deadline, l2_tsc_deadline;
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+	//
+	// Check for Preemption timer support and turn on PIN control
+	//
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+			: MSR_IA32_VMX_EXIT_CTLS);
+
+	if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+	    !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+		return;
+
+	GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+			      vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+			      PIN_BASED_VMX_PREEMPTION_TIMER));
+
+	GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+			      PREEMPTION_TIMER_VALUE));
+
+	vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+	l2_save_restore_done = 0;
+
+	l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	GUEST_ASSERT(!vmresume());
+
+	l1_vmx_pt_finish = rdtsc();
+
+	//
+	// Ensure exit from L2 happens after L2 goes through
+	// save and restore
+	GUEST_ASSERT(l2_save_restore_done);
+
+	//
+	// Ensure the exit from L2 is due to preemption timer expiry
+	//
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+	l1_tsc_deadline = l1_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	l2_tsc_deadline = l2_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	//
+	// Sync with the host and pass the l1|l2 pt_expiry_finish times and
+	// tsc deadlines so that host can verify they are as expected
+	//
+	GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+		l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	/*
+	 * AMD currently does not implement any VMX features, so for now we
+	 * just early out.
+	 */
+	nested_vmx_check_supported();
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		vcpu_alloc_vmx(vm, &vmx_pages_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	} else {
+		pr_info("will skip nested state checks\n");
+		goto done;
+	}
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+		//
+		// If this stage 2 then we should verify the vmx pt expiry
+		// is as expected
+		//
+		if (stage == 2) {
+
+			pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+				stage, uc.args[2], uc.args[3]);
+
+			pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+				stage, uc.args[4], uc.args[5]);
+
+			//
+			// From L1's perspective verify Preemption timer hasn't
+			// expired too early
+			//
+
+			TEST_ASSERT(uc.args[2] >= uc.args[3],
+				"Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+				stage, uc.args[2], uc.args[3]);
+
+			//
+			// From L2's perspective verify Preemption timer hasn't
+			// expired too late
+			//
+			TEST_ASSERT(uc.args[4] < uc.args[5],
+				"Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+				stage, uc.args[4], uc.args[5]);
+		}
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
-- 
2.26.2.761.g0e0b3e54be-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH  1/2] KVM: nVMX: Fix VMX preemption timer migration
  2020-05-18 20:15 ` [PATCH 1/2] KVM: nVMX: " Makarand Sonare
@ 2020-05-19  0:49   ` Vitaly Kuznetsov
  2020-05-19  7:18   ` Krish Sadhukhan
  1 sibling, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2020-05-19  0:49 UTC (permalink / raw)
  To: Makarand Sonare; +Cc: kvm, pshier, jmattson

Makarand Sonare <makarandsonare@google.com> writes:

> From: Peter Shier <pshier@google.com>
>
> Add new field to hold preemption timer expiration deadline
> appended to struct kvm_vmx_nested_state_data. This is to prevent
> the first VM-Enter after migration from incorrectly restarting the timer
> with the full timer value instead of partially decayed timer value.
> KVM_SET_NESTED_STATE restarts timer using migrated state regardless
> of whether L1 sets VM_EXIT_SAVE_VMX_PREEMPTION_TIMER.
>
> Fixes: cf8b84f48a593 ("kvm: nVMX: Prepare for checkpointing L2 state")
>
> Signed-off-by: Peter Shier <pshier@google.com>
> Signed-off-by: Makarand Sonare <makarandsonare@google.com>
> Change-Id: I6446aba5a547afa667f0ef4620b1b76115bf3753
> ---
>  Documentation/virt/kvm/api.rst        |  4 ++
>  arch/x86/include/uapi/asm/kvm.h       |  2 +
>  arch/x86/kvm/vmx/nested.c             | 61 ++++++++++++++++++++++++---
>  arch/x86/kvm/vmx/vmx.h                |  1 +
>  arch/x86/kvm/x86.c                    |  3 +-
>  include/uapi/linux/kvm.h              |  1 +
>  tools/arch/x86/include/uapi/asm/kvm.h |  2 +
>  7 files changed, 67 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index d871dacb984e9..b410815772970 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4326,6 +4326,9 @@ Errors:
>    #define KVM_STATE_NESTED_RUN_PENDING		0x00000002
>    #define KVM_STATE_NESTED_EVMCS		0x00000004
>  

Not your fault but KVM_STATE_NESTED_MTF_PENDING seems to be missing here

> +  /* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
> +  #define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
> +
>    #define KVM_STATE_NESTED_FORMAT_VMX		0
>    #define KVM_STATE_NESTED_FORMAT_SVM		1
>  
> @@ -4346,6 +4349,7 @@ Errors:
>    struct kvm_vmx_nested_state_data {
>  	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>  	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> +	__u64 preemption_timer_deadline;
>    };
>  
>  This ioctl copies the vcpu's nested virtualization state from the kernel to
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index 3f3f780c8c650..20d5832bab215 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -391,6 +391,7 @@ struct kvm_sync_regs {
>  #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
>  #define KVM_STATE_NESTED_EVMCS		0x00000004
>  #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
> +#define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010

and here you don't have the "/* Available with
KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */" comment you put to api.rst. I
think it would be better to keep them in sync.

>  
>  #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
>  #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
> @@ -400,6 +401,7 @@ struct kvm_sync_regs {
>  struct kvm_vmx_nested_state_data {
>  	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>  	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> +	__u64 preemption_timer_deadline;
>  };
>  
>  struct kvm_vmx_nested_state_hdr {
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 51ebb60e1533a..badb82a39ac04 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2092,9 +2092,9 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
>  	return HRTIMER_NORESTART;
>  }
>  
> -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
> +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
> +					u64 preemption_timeout)
>  {
> -	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  
>  	/*
> @@ -3353,8 +3353,24 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
>  	 * the timer.
>  	 */
>  	vmx->nested.preemption_timer_expired = false;
> -	if (nested_cpu_has_preemption_timer(vmcs12))
> -		vmx_start_preemption_timer(vcpu);
> +	if (nested_cpu_has_preemption_timer(vmcs12)) {
> +		u64 timer_value;
> +		u64 l1_tsc_value = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +		if (from_vmentry) {
> +			timer_value = vmcs12->vmx_preemption_timer_value;
> +			vmx->nested.preemption_timer_deadline = timer_value +
> +				(l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);
> +		} else {
> +			if ((l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE) >
> +			    vmx->nested.preemption_timer_deadline)
> +				timer_value = 0;
> +			else
> +				timer_value = vmx->nested.preemption_timer_deadline -
> +					(l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);
> +		}
> +		vmx_start_preemption_timer(vcpu, timer_value);
> +	}
>  
>  	/*
>  	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
> @@ -3962,9 +3978,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
>  
>  	if (nested_cpu_has_preemption_timer(vmcs12) &&
> -	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
> +	    !vmx->nested.nested_run_pending) {
> +		if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
>  			vmcs12->vmx_preemption_timer_value =
>  				vmx_get_preemption_timer_value(vcpu);
> +	}
>  
>  	/*
>  	 * In some cases (usually, nested EPT), L2 is allowed to change its
> @@ -5939,6 +5957,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>  
>  			if (vmx->nested.mtf_pending)
>  				kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
> +
> +			if (nested_cpu_has_preemption_timer(vmcs12)) {
> +				kvm_state.flags |=
> +					KVM_STATE_NESTED_PREEMPTION_TIMER;
> +				kvm_state.size = offsetof(struct kvm_nested_state, data.vmx) +
> +						 offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_deadline);

Hm, here we seem to drop all previous calculations for the
kvm_state.size (like if shadow vmcs was present or not). I think this
should just be

 kvm_state.size +=  sizeof(user_vmx_nested_state.preemption_timer_deadline);

instead.

> +			}
>  		}
>  	}
>  
> @@ -5970,6 +5995,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>  
>  	BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
>  	BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
> +	BUILD_BUG_ON(sizeof(user_vmx_nested_state->preemption_timer_deadline)
> +		    != sizeof(vmx->nested.preemption_timer_deadline));
> +
>  
>  	/*
>  	 * Copy over the full allocated size of vmcs12 rather than just the size
> @@ -5985,6 +6013,12 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>  			return -EFAULT;
>  	}
>  
> +	if (kvm_state.flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> +		if (put_user(vmx->nested.preemption_timer_deadline,
> +			     &user_vmx_nested_state->preemption_timer_deadline))
> +			return -EFAULT;
> +	}
> +
>  out:
>  	return kvm_state.size;
>  }
> @@ -6056,7 +6090,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
>  	 */
>  	if (is_smm(vcpu) ?
>  		(kvm_state->flags &
> -		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
> +		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING |
> +		  KVM_STATE_NESTED_PREEMPTION_TIMER))
>  		: kvm_state->hdr.vmx.smm.flags)
>  		return -EINVAL;
>  
> @@ -6146,6 +6181,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
>  			goto error_guest_mode;
>  	}
>  
> +	if (kvm_state->flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> +
> +		if (kvm_state->size <
> +		    offsetof(struct kvm_nested_state, data.vmx) +
> +		    offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_deadline))

This also doesn't seem to be correct. E.g. what if there is no shadow
vmcs data in the saved state?

> +			goto error_guest_mode;
> +
> +		if (get_user(vmx->nested.preemption_timer_deadline,
> +			     &user_vmx_nested_state->preemption_timer_deadline)) {
> +			ret = -EFAULT;
> +			goto error_guest_mode;
> +		}
> +	}
> +
>  	if (nested_vmx_check_controls(vcpu, vmcs12) ||
>  	    nested_vmx_check_host_state(vcpu, vmcs12) ||
>  	    nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 298ddef79d009..db697400755fb 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -169,6 +169,7 @@ struct nested_vmx {
>  	u16 posted_intr_nv;
>  
>  	struct hrtimer preemption_timer;
> +	u64 preemption_timer_deadline;
>  	bool preemption_timer_expired;
>  
>  	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 471fccf7f8501..ba9e62ffbb4cd 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3418,6 +3418,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_MSR_PLATFORM_INFO:
>  	case KVM_CAP_EXCEPTION_PAYLOAD:
>  	case KVM_CAP_SET_GUEST_DEBUG:
> +	case KVM_CAP_NESTED_STATE_PREEMPTION_TIMER:
>  		r = 1;
>  		break;
>  	case KVM_CAP_SYNC_REGS:
> @@ -4626,7 +4627,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>  
>  		if (kvm_state.flags &
>  		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
> -		      | KVM_STATE_NESTED_EVMCS))
> +		      | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_PREEMPTION_TIMER))
>  			break;
>  
>  		/* nested_run_pending implies guest_mode.  */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index ac9eba0289d1b..0868dce12a715 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1018,6 +1018,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_S390_PROTECTED 180
>  #define KVM_CAP_PPC_SECURE_GUEST 181
>  #define KVM_CAP_HALT_POLL 182
> +#define KVM_CAP_NESTED_STATE_PREEMPTION_TIMER 183
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
> index 3f3f780c8c650..60701178b9cc1 100644
> --- a/tools/arch/x86/include/uapi/asm/kvm.h
> +++ b/tools/arch/x86/include/uapi/asm/kvm.h
> @@ -391,6 +391,8 @@ struct kvm_sync_regs {
>  #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
>  #define KVM_STATE_NESTED_EVMCS		0x00000004
>  #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
> +/* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
> +#define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
>  
>  #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
>  #define KVM_STATE_NESTED_SMM_VMXON	0x00000002

It seems tools/arch/x86/include/uapi/asm/kvm.h is usually updated
separately (not sure it's the right thing to do though..)

-- 
Vitaly


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH  2/2] KVM: selftests: VMX preemption timer migration test
  2020-05-18 20:16 ` [PATCH 2/2] KVM: selftests: VMX preemption timer migration test Makarand Sonare
@ 2020-05-19  0:56   ` Vitaly Kuznetsov
  2020-05-20  2:14   ` Krish Sadhukhan
  1 sibling, 0 replies; 8+ messages in thread
From: Vitaly Kuznetsov @ 2020-05-19  0:56 UTC (permalink / raw)
  To: Makarand Sonare; +Cc: Makarand Sonare, kvm, pshier, jmattson

Makarand Sonare <makarandsonare@google.com> writes:

> When a nested VM with a VMX-preemption timer is migrated, verify that the
> nested VM and its parent VM observe the VMX-preemption timer exit close to
> the original expiration deadline.
>
> Signed-off-by: Makarand Sonare <makarandsonare@google.com>
> Reviewed-by: Jim Mattson <jmattson@google.com>
> Change-Id: Ia79337c682ee161399525edc34201fad473fc190
> ---
>  tools/arch/x86/include/uapi/asm/kvm.h         |   1 +
>  tools/testing/selftests/kvm/.gitignore        |   1 +
>  tools/testing/selftests/kvm/Makefile          |   1 +
>  .../testing/selftests/kvm/include/kvm_util.h  |   2 +
>  .../selftests/kvm/include/x86_64/processor.h  |  11 +-
>  .../selftests/kvm/include/x86_64/vmx.h        |  27 ++
>  .../kvm/x86_64/vmx_preemption_timer_test.c    | 256 ++++++++++++++++++
>  7 files changed, 295 insertions(+), 4 deletions(-)
>  create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
>
> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
> index 60701178b9cc1..13dca545554dc 100644
> --- a/tools/arch/x86/include/uapi/asm/kvm.h
> +++ b/tools/arch/x86/include/uapi/asm/kvm.h
> @@ -402,6 +402,7 @@ struct kvm_sync_regs {
>  struct kvm_vmx_nested_state_data {
>  	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>  	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> +	__u64 preemption_timer_deadline;
>  };
>  
>  struct kvm_vmx_nested_state_hdr {
> diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
> index 222e50104296a..f159718f90c0a 100644
> --- a/tools/testing/selftests/kvm/.gitignore
> +++ b/tools/testing/selftests/kvm/.gitignore
> @@ -10,6 +10,7 @@
>  /x86_64/set_sregs_test
>  /x86_64/smm_test
>  /x86_64/state_test
> +/x86_64/vmx_preemption_timer_test
>  /x86_64/svm_vmcall_test
>  /x86_64/sync_regs_test
>  /x86_64/vmx_close_while_nested_test
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index c66f4eec34111..780f0c189a7bc 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -20,6 +20,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
>  TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
>  TEST_GEN_PROGS_x86_64 += x86_64/smm_test
>  TEST_GEN_PROGS_x86_64 += x86_64/state_test
> +TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
>  TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
>  TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
>  TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
> index e244c6ecfc1d5..919e161dd2893 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
> @@ -314,6 +314,8 @@ void ucall_uninit(struct kvm_vm *vm);
>  void ucall(uint64_t cmd, int nargs, ...);
>  uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
>  
> +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
> +				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
>  #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
>  #define GUEST_DONE()		ucall(UCALL_DONE, 0)
>  #define __GUEST_ASSERT(_condition, _nargs, _args...) do {	\
> diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
> index 7428513a4c687..7cb19eca6c72d 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/processor.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
> @@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc)
>  static inline uint64_t rdtsc(void)
>  {
>  	uint32_t eax, edx;
> -
> +	uint32_t tsc_val;
>  	/*
>  	 * The lfence is to wait (on Intel CPUs) until all previous
> -	 * instructions have been executed.
> +	 * instructions have been executed. If software requires RDTSC to be
> +	 * executed prior to execution of any subsequent instruction, it can
> +	 * execute LFENCE immediately after RDTSC
>  	 */
> -	__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
> -	return ((uint64_t)edx) << 32 | eax;
> +	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
> +	tsc_val = ((uint64_t)edx) << 32 | eax;
> +	return tsc_val;
>  }
>  
>  static inline uint64_t rdtscp(uint32_t *aux)
> diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
> index 3d27069b9ed9c..ccff3e6e27048 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
> @@ -575,6 +575,33 @@ struct vmx_pages {
>  	void *eptp;
>  };
>  
> +union vmx_basic {
> +	u64 val;
> +	struct {
> +		u32 revision;
> +		u32	size:13,
> +			reserved1:3,
> +			width:1,
> +			dual:1,
> +			type:4,
> +			insouts:1,
> +			ctrl:1,
> +			vm_entry_exception_ctrl:1,
> +			reserved2:7;
> +	};
> +};
> +
> +union vmx_ctrl_msr {
> +	u64 val;
> +	struct {
> +		u32 set, clr;
> +	};
> +};
> +
> +union vmx_basic basic;
> +union vmx_ctrl_msr ctrl_pin_rev;
> +union vmx_ctrl_msr ctrl_exit_rev;
> +
>  struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
>  bool prepare_for_vmx_operation(struct vmx_pages *vmx);
>  void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
> diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
> new file mode 100644
> index 0000000000000..10893b11511be
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
> @@ -0,0 +1,256 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * VMX-preemption timer test
> + *
> + * Copyright (C) 2020, Google, LLC.
> + *
> + * Test to ensure the VM-Enter after migration doesn't
> + * incorrectly restarts the timer with the full timer
> + * value instead of partially decayed timer value
> + *
> + */
> +#define _GNU_SOURCE /* for program_invocation_short_name */
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/ioctl.h>
> +
> +#include "test_util.h"
> +
> +#include "kvm_util.h"
> +#include "processor.h"
> +#include "vmx.h"
> +
> +#define VCPU_ID		5
> +#define PREEMPTION_TIMER_VALUE			100000000ull
> +#define PREEMPTION_TIMER_VALUE_THRESHOLD1	 80000000ull
> +
> +u32 vmx_pt_rate;
> +bool l2_save_restore_done;
> +static u64 l2_vmx_pt_start;
> +volatile u64 l2_vmx_pt_finish;
> +
> +void l2_guest_code(void)
> +{
> +	u64 vmx_pt_delta;
> +
> +	vmcall();
> +	l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
> +
> +	//
> +	// Wait until the 1st threshold has passed
> +	//
> +	do {
> +		l2_vmx_pt_finish = rdtsc();
> +		vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
> +				vmx_pt_rate;
> +	} while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
> +
> +	//
> +	// Force L2 through Save and Restore cycle
> +	//
> +	GUEST_SYNC(1);
> +
> +	l2_save_restore_done = 1;
> +
> +	//
> +	// Now wait for the preemption timer to fire and
> +	// exit to L1
> +	//
> +	while ((l2_vmx_pt_finish = rdtsc()))
> +		;
> +}
> +
> +void l1_guest_code(struct vmx_pages *vmx_pages)
> +{
> +#define L2_GUEST_STACK_SIZE 64
> +	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
> +	u64 l1_vmx_pt_start;
> +	u64 l1_vmx_pt_finish;
> +	u64 l1_tsc_deadline, l2_tsc_deadline;
> +
> +	GUEST_ASSERT(vmx_pages->vmcs_gpa);
> +	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
> +	GUEST_ASSERT(load_vmcs(vmx_pages));
> +	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
> +
> +	prepare_vmcs(vmx_pages, l2_guest_code,
> +		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
> +
> +	GUEST_ASSERT(!vmlaunch());
> +	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
> +	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
> +
> +	//
> +	// Check for Preemption timer support and turn on PIN control
> +	//
> +	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
> +	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
> +			: MSR_IA32_VMX_PINBASED_CTLS);
> +	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
> +			: MSR_IA32_VMX_EXIT_CTLS);
> +
> +	if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
> +	    !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
> +		return;
> +
> +	GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
> +			      vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
> +			      PIN_BASED_VMX_PREEMPTION_TIMER));
> +
> +	GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
> +			      PREEMPTION_TIMER_VALUE));
> +
> +	vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
> +
> +	l2_save_restore_done = 0;
> +
> +	l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
> +
> +	GUEST_ASSERT(!vmresume());
> +
> +	l1_vmx_pt_finish = rdtsc();
> +
> +	//
> +	// Ensure exit from L2 happens after L2 goes through
> +	// save and restore
> +	GUEST_ASSERT(l2_save_restore_done);
> +
> +	//
> +	// Ensure the exit from L2 is due to preemption timer expiry
> +	//
> +	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
> +
> +	l1_tsc_deadline = l1_vmx_pt_start +
> +		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
> +
> +	l2_tsc_deadline = l2_vmx_pt_start +
> +		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
> +
> +	//
> +	// Sync with the host and pass the l1|l2 pt_expiry_finish times and
> +	// tsc deadlines so that host can verify they are as expected
> +	//
> +	GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
> +		l2_vmx_pt_finish, l2_tsc_deadline);
> +}
> +
> +void guest_code(struct vmx_pages *vmx_pages)
> +{
> +	if (vmx_pages)
> +		l1_guest_code(vmx_pages);
> +
> +	GUEST_DONE();
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	vm_vaddr_t vmx_pages_gva = 0;
> +
> +	struct kvm_regs regs1, regs2;
> +	struct kvm_vm *vm;
> +	struct kvm_run *run;
> +	struct kvm_x86_state *state;
> +	struct ucall uc;
> +	int stage;
> +
> +	/*
> +	 * AMD currently does not implement any VMX features, so for now we
> +	 * just early out.
> +	 */
> +	nested_vmx_check_supported();
> +
> +	/* Create VM */
> +	vm = vm_create_default(VCPU_ID, 0, guest_code);
> +	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
> +	run = vcpu_state(vm, VCPU_ID);
> +
> +	vcpu_regs_get(vm, VCPU_ID, &regs1);
> +
> +	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {

You need to check the newly added KVM_CAP_NESTED_STATE_PREEMPTION_TIMER
instead or skip the whole test.

> +		vcpu_alloc_vmx(vm, &vmx_pages_gva);
> +		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
> +	} else {
> +		pr_info("will skip nested state checks\n");

Copy/paste from state_test.c detected :-)

> +		goto done;
> +	}
> +
> +	for (stage = 1;; stage++) {
> +		_vcpu_run(vm, VCPU_ID);
> +		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
> +			    "Stage %d: unexpected exit reason: %u (%s),\n",
> +			    stage, run->exit_reason,
> +			    exit_reason_str(run->exit_reason));
> +
> +		switch (get_ucall(vm, VCPU_ID, &uc)) {
> +		case UCALL_ABORT:
> +			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
> +				  __FILE__, uc.args[1]);
> +			/* NOT REACHED */
> +		case UCALL_SYNC:
> +			break;
> +		case UCALL_DONE:
> +			goto done;
> +		default:
> +			TEST_FAIL("Unknown ucall %lu", uc.cmd);
> +		}
> +
> +		/* UCALL_SYNC is handled here.  */
> +		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
> +			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
> +			    stage, (ulong)uc.args[1]);
> +		//
> +		// If this stage 2 then we should verify the vmx pt expiry
> +		// is as expected
> +		//

Nitpick: coding style requirements for selftests is definitely lower
than for KVM itself but could you please be consistent with comments and
use kernel style 
  /*
   * This is a comment.
   */
comments exclusively (you seem to have a mix)? Thanks!

> +		if (stage == 2) {
> +
> +			pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
> +				stage, uc.args[2], uc.args[3]);
> +
> +			pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
> +				stage, uc.args[4], uc.args[5]);
> +
> +			//
> +			// From L1's perspective verify Preemption timer hasn't
> +			// expired too early
> +			//
> +
> +			TEST_ASSERT(uc.args[2] >= uc.args[3],
> +				"Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
> +				stage, uc.args[2], uc.args[3]);
> +
> +			//
> +			// From L2's perspective verify Preemption timer hasn't
> +			// expired too late
> +			//
> +			TEST_ASSERT(uc.args[4] < uc.args[5],
> +				"Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
> +				stage, uc.args[4], uc.args[5]);
> +		}
> +
> +		state = vcpu_save_state(vm, VCPU_ID);
> +		memset(&regs1, 0, sizeof(regs1));
> +		vcpu_regs_get(vm, VCPU_ID, &regs1);
> +
> +		kvm_vm_release(vm);
> +
> +		/* Restore state in a new VM.  */
> +		kvm_vm_restart(vm, O_RDWR);
> +		vm_vcpu_add(vm, VCPU_ID);
> +		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
> +		vcpu_load_state(vm, VCPU_ID, state);
> +		run = vcpu_state(vm, VCPU_ID);
> +		free(state);
> +
> +		memset(&regs2, 0, sizeof(regs2));
> +		vcpu_regs_get(vm, VCPU_ID, &regs2);
> +		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
> +			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
> +			    (ulong) regs2.rdi, (ulong) regs2.rsi);
> +	}
> +
> +done:
> +	kvm_vm_free(vm);
> +}

-- 
Vitaly


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] KVM: nVMX: Fix VMX preemption timer migration
  2020-05-18 20:15 ` [PATCH 1/2] KVM: nVMX: " Makarand Sonare
  2020-05-19  0:49   ` Vitaly Kuznetsov
@ 2020-05-19  7:18   ` Krish Sadhukhan
  1 sibling, 0 replies; 8+ messages in thread
From: Krish Sadhukhan @ 2020-05-19  7:18 UTC (permalink / raw)
  To: Makarand Sonare, kvm, pshier, jmattson


On 5/18/20 1:15 PM, Makarand Sonare wrote:
> From: Peter Shier <pshier@google.com>
>
> Add new field to hold preemption timer expiration deadline
> appended to struct kvm_vmx_nested_state_data. This is to prevent
> the first VM-Enter after migration from incorrectly restarting the timer
> with the full timer value instead of partially decayed timer value.
> KVM_SET_NESTED_STATE restarts timer using migrated state regardless
> of whether L1 sets VM_EXIT_SAVE_VMX_PREEMPTION_TIMER.
>
> Fixes: cf8b84f48a593 ("kvm: nVMX: Prepare for checkpointing L2 state")
>
> Signed-off-by: Peter Shier <pshier@google.com>
> Signed-off-by: Makarand Sonare <makarandsonare@google.com>
> Change-Id: I6446aba5a547afa667f0ef4620b1b76115bf3753
> ---
>   Documentation/virt/kvm/api.rst        |  4 ++
>   arch/x86/include/uapi/asm/kvm.h       |  2 +
>   arch/x86/kvm/vmx/nested.c             | 61 ++++++++++++++++++++++++---
>   arch/x86/kvm/vmx/vmx.h                |  1 +
>   arch/x86/kvm/x86.c                    |  3 +-
>   include/uapi/linux/kvm.h              |  1 +
>   tools/arch/x86/include/uapi/asm/kvm.h |  2 +
>   7 files changed, 67 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index d871dacb984e9..b410815772970 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4326,6 +4326,9 @@ Errors:
>     #define KVM_STATE_NESTED_RUN_PENDING		0x00000002
>     #define KVM_STATE_NESTED_EVMCS		0x00000004
>   
> +  /* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
> +  #define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
> +
>     #define KVM_STATE_NESTED_FORMAT_VMX		0
>     #define KVM_STATE_NESTED_FORMAT_SVM		1
>   
> @@ -4346,6 +4349,7 @@ Errors:
>     struct kvm_vmx_nested_state_data {
>   	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>   	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> +	__u64 preemption_timer_deadline;
>     };
>   
>   This ioctl copies the vcpu's nested virtualization state from the kernel to
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index 3f3f780c8c650..20d5832bab215 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -391,6 +391,7 @@ struct kvm_sync_regs {
>   #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
>   #define KVM_STATE_NESTED_EVMCS		0x00000004
>   #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
> +#define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
>   
>   #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
>   #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
> @@ -400,6 +401,7 @@ struct kvm_sync_regs {
>   struct kvm_vmx_nested_state_data {
>   	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>   	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> +	__u64 preemption_timer_deadline;
>   };
>   
>   struct kvm_vmx_nested_state_hdr {
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 51ebb60e1533a..badb82a39ac04 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2092,9 +2092,9 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
>   	return HRTIMER_NORESTART;
>   }
>   
> -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
> +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
> +					u64 preemption_timeout)
>   {
> -	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
>   	struct vcpu_vmx *vmx = to_vmx(vcpu);
>   
>   	/*
> @@ -3353,8 +3353,24 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
>   	 * the timer.
>   	 */
>   	vmx->nested.preemption_timer_expired = false;
> -	if (nested_cpu_has_preemption_timer(vmcs12))
> -		vmx_start_preemption_timer(vcpu);
> +	if (nested_cpu_has_preemption_timer(vmcs12)) {
> +		u64 timer_value;
> +		u64 l1_tsc_value = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +		if (from_vmentry) {
> +			timer_value = vmcs12->vmx_preemption_timer_value;
> +			vmx->nested.preemption_timer_deadline = timer_value +
> +				(l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);
> +		} else {
> +			if ((l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE) >
> +			    vmx->nested.preemption_timer_deadline)
> +				timer_value = 0;
> +			else
> +				timer_value = vmx->nested.preemption_timer_deadline -
> +					(l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);

A couple of code optimizations can be done here:

1. The inside if-else can be simplified by initializing 'timer_value' at 
the place where you have defined it:

     } else {
+            if (!((l1_tsc_value >> 
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE) >
+                vmx->nested.preemption_timer_deadline))
+                    timer_value = vmx->nested.preemption_timer_deadline -
+                        (l1_tsc_value >> 
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);

+  }


2. You can apply the right-shift operation on 'l1_tsc_value' right where 
you have defined it:

u64 l1_tsc_value = kvm_read_l1_tsc(vcpu, rdtsc()) >> 
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE

> +		}
> +		vmx_start_preemption_timer(vcpu, timer_value);
> +	}
>   
>   	/*
>   	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
> @@ -3962,9 +3978,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>   		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
>   
>   	if (nested_cpu_has_preemption_timer(vmcs12) &&
> -	    vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
> +	    !vmx->nested.nested_run_pending) {
> +		if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
>   			vmcs12->vmx_preemption_timer_value =
>   				vmx_get_preemption_timer_value(vcpu);
> +	}
>   
>   	/*
>   	 * In some cases (usually, nested EPT), L2 is allowed to change its
> @@ -5939,6 +5957,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>   
>   			if (vmx->nested.mtf_pending)
>   				kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
> +
> +			if (nested_cpu_has_preemption_timer(vmcs12)) {
> +				kvm_state.flags |=
> +					KVM_STATE_NESTED_PREEMPTION_TIMER;
> +				kvm_state.size = offsetof(struct kvm_nested_state, data.vmx) +


'size' is reset here instead of accumulating all previous sizes. Is that 
what is intended ?

> +						 offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_deadline);
> +			}
>   		}
>   	}
>   
> @@ -5970,6 +5995,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>   
>   	BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
>   	BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
> +	BUILD_BUG_ON(sizeof(user_vmx_nested_state->preemption_timer_deadline)
> +		    != sizeof(vmx->nested.preemption_timer_deadline));
> +
>   
>   	/*
>   	 * Copy over the full allocated size of vmcs12 rather than just the size
> @@ -5985,6 +6013,12 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>   			return -EFAULT;
>   	}
>   
> +	if (kvm_state.flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> +		if (put_user(vmx->nested.preemption_timer_deadline,
> +			     &user_vmx_nested_state->preemption_timer_deadline))
> +			return -EFAULT;
> +	}
> +
>   out:
>   	return kvm_state.size;
>   }
> @@ -6056,7 +6090,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
>   	 */
>   	if (is_smm(vcpu) ?
>   		(kvm_state->flags &
> -		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
> +		 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING |
> +		  KVM_STATE_NESTED_PREEMPTION_TIMER))
>   		: kvm_state->hdr.vmx.smm.flags)
>   		return -EINVAL;
>   
> @@ -6146,6 +6181,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
>   			goto error_guest_mode;
>   	}
>   
> +	if (kvm_state->flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> +
> +		if (kvm_state->size <
> +		    offsetof(struct kvm_nested_state, data.vmx) +
> +		    offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_deadline))
> +			goto error_guest_mode;
> +
> +		if (get_user(vmx->nested.preemption_timer_deadline,
> +			     &user_vmx_nested_state->preemption_timer_deadline)) {
> +			ret = -EFAULT;
> +			goto error_guest_mode;
> +		}
> +	}
> +
>   	if (nested_vmx_check_controls(vcpu, vmcs12) ||
>   	    nested_vmx_check_host_state(vcpu, vmcs12) ||
>   	    nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 298ddef79d009..db697400755fb 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -169,6 +169,7 @@ struct nested_vmx {
>   	u16 posted_intr_nv;
>   
>   	struct hrtimer preemption_timer;
> +	u64 preemption_timer_deadline;
>   	bool preemption_timer_expired;
>   
>   	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 471fccf7f8501..ba9e62ffbb4cd 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3418,6 +3418,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>   	case KVM_CAP_MSR_PLATFORM_INFO:
>   	case KVM_CAP_EXCEPTION_PAYLOAD:
>   	case KVM_CAP_SET_GUEST_DEBUG:
> +	case KVM_CAP_NESTED_STATE_PREEMPTION_TIMER:
>   		r = 1;
>   		break;
>   	case KVM_CAP_SYNC_REGS:
> @@ -4626,7 +4627,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>   
>   		if (kvm_state.flags &
>   		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
> -		      | KVM_STATE_NESTED_EVMCS))
> +		      | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_PREEMPTION_TIMER))
>   			break;
>   
>   		/* nested_run_pending implies guest_mode.  */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index ac9eba0289d1b..0868dce12a715 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1018,6 +1018,7 @@ struct kvm_ppc_resize_hpt {
>   #define KVM_CAP_S390_PROTECTED 180
>   #define KVM_CAP_PPC_SECURE_GUEST 181
>   #define KVM_CAP_HALT_POLL 182
> +#define KVM_CAP_NESTED_STATE_PREEMPTION_TIMER 183
>   
>   #ifdef KVM_CAP_IRQ_ROUTING
>   
> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
> index 3f3f780c8c650..60701178b9cc1 100644
> --- a/tools/arch/x86/include/uapi/asm/kvm.h
> +++ b/tools/arch/x86/include/uapi/asm/kvm.h
> @@ -391,6 +391,8 @@ struct kvm_sync_regs {
>   #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
>   #define KVM_STATE_NESTED_EVMCS		0x00000004
>   #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
> +/* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
> +#define KVM_STATE_NESTED_PREEMPTION_TIMER	0x00000010
>   
>   #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
>   #define KVM_STATE_NESTED_SMM_VMXON	0x00000002

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] KVM: selftests: VMX preemption timer migration test
  2020-05-18 20:16 ` [PATCH 2/2] KVM: selftests: VMX preemption timer migration test Makarand Sonare
  2020-05-19  0:56   ` Vitaly Kuznetsov
@ 2020-05-20  2:14   ` Krish Sadhukhan
  2020-05-20 23:18     ` Makarand Sonare
  1 sibling, 1 reply; 8+ messages in thread
From: Krish Sadhukhan @ 2020-05-20  2:14 UTC (permalink / raw)
  To: Makarand Sonare, kvm, pshier, jmattson


On 5/18/20 1:16 PM, Makarand Sonare wrote:
> When a nested VM with a VMX-preemption timer is migrated, verify that the
> nested VM and its parent VM observe the VMX-preemption timer exit close to
> the original expiration deadline.
>
> Signed-off-by: Makarand Sonare <makarandsonare@google.com>
> Reviewed-by: Jim Mattson <jmattson@google.com>
> Change-Id: Ia79337c682ee161399525edc34201fad473fc190
> ---
>   tools/arch/x86/include/uapi/asm/kvm.h         |   1 +
>   tools/testing/selftests/kvm/.gitignore        |   1 +
>   tools/testing/selftests/kvm/Makefile          |   1 +
>   .../testing/selftests/kvm/include/kvm_util.h  |   2 +
>   .../selftests/kvm/include/x86_64/processor.h  |  11 +-
>   .../selftests/kvm/include/x86_64/vmx.h        |  27 ++
>   .../kvm/x86_64/vmx_preemption_timer_test.c    | 256 ++++++++++++++++++
>   7 files changed, 295 insertions(+), 4 deletions(-)
>   create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
>
> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
> index 60701178b9cc1..13dca545554dc 100644
> --- a/tools/arch/x86/include/uapi/asm/kvm.h
> +++ b/tools/arch/x86/include/uapi/asm/kvm.h
> @@ -402,6 +402,7 @@ struct kvm_sync_regs {
>   struct kvm_vmx_nested_state_data {
>   	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>   	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> +	__u64 preemption_timer_deadline;
>   };
>   
>   struct kvm_vmx_nested_state_hdr {
> diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
> index 222e50104296a..f159718f90c0a 100644
> --- a/tools/testing/selftests/kvm/.gitignore
> +++ b/tools/testing/selftests/kvm/.gitignore
> @@ -10,6 +10,7 @@
>   /x86_64/set_sregs_test
>   /x86_64/smm_test
>   /x86_64/state_test
> +/x86_64/vmx_preemption_timer_test
>   /x86_64/svm_vmcall_test
>   /x86_64/sync_regs_test
>   /x86_64/vmx_close_while_nested_test
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index c66f4eec34111..780f0c189a7bc 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -20,6 +20,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
>   TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
>   TEST_GEN_PROGS_x86_64 += x86_64/smm_test
>   TEST_GEN_PROGS_x86_64 += x86_64/state_test
> +TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
>   TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
>   TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
>   TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
> index e244c6ecfc1d5..919e161dd2893 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
> @@ -314,6 +314,8 @@ void ucall_uninit(struct kvm_vm *vm);
>   void ucall(uint64_t cmd, int nargs, ...);
>   uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
>   
> +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
> +				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
>   #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
>   #define GUEST_DONE()		ucall(UCALL_DONE, 0)
>   #define __GUEST_ASSERT(_condition, _nargs, _args...) do {	\
> diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
> index 7428513a4c687..7cb19eca6c72d 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/processor.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
> @@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc)
>   static inline uint64_t rdtsc(void)
>   {
>   	uint32_t eax, edx;
> -
> +	uint32_t tsc_val;
>   	/*
>   	 * The lfence is to wait (on Intel CPUs) until all previous
> -	 * instructions have been executed.
> +	 * instructions have been executed. If software requires RDTSC to be
> +	 * executed prior to execution of any subsequent instruction, it can
> +	 * execute LFENCE immediately after RDTSC
>   	 */
> -	__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
> -	return ((uint64_t)edx) << 32 | eax;
> +	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
> +	tsc_val = ((uint64_t)edx) << 32 | eax;
> +	return tsc_val;
>   }
>   
>   static inline uint64_t rdtscp(uint32_t *aux)
> diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
> index 3d27069b9ed9c..ccff3e6e27048 100644
> --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
> +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
> @@ -575,6 +575,33 @@ struct vmx_pages {
>   	void *eptp;
>   };
>   
> +union vmx_basic {
> +	u64 val;
> +	struct {
> +		u32 revision;
> +		u32	size:13,
> +			reserved1:3,
> +			width:1,
> +			dual:1,
> +			type:4,
> +			insouts:1,
> +			ctrl:1,
> +			vm_entry_exception_ctrl:1,
> +			reserved2:7;
> +	};
> +};
> +
> +union vmx_ctrl_msr {
> +	u64 val;
> +	struct {
> +		u32 set, clr;
> +	};
> +};
> +
> +union vmx_basic basic;
> +union vmx_ctrl_msr ctrl_pin_rev;
> +union vmx_ctrl_msr ctrl_exit_rev;
> +
>   struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
>   bool prepare_for_vmx_operation(struct vmx_pages *vmx);
>   void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
> diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
> new file mode 100644
> index 0000000000000..10893b11511be
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
> @@ -0,0 +1,256 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * VMX-preemption timer test
> + *
> + * Copyright (C) 2020, Google, LLC.
> + *
> + * Test to ensure the VM-Enter after migration doesn't
> + * incorrectly restarts the timer with the full timer
> + * value instead of partially decayed timer value
> + *
> + */
> +#define _GNU_SOURCE /* for program_invocation_short_name */
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/ioctl.h>
> +
> +#include "test_util.h"
> +
> +#include "kvm_util.h"
> +#include "processor.h"
> +#include "vmx.h"
> +
> +#define VCPU_ID		5
> +#define PREEMPTION_TIMER_VALUE			100000000ull
> +#define PREEMPTION_TIMER_VALUE_THRESHOLD1	 80000000ull
> +
> +u32 vmx_pt_rate;
> +bool l2_save_restore_done;
> +static u64 l2_vmx_pt_start;
> +volatile u64 l2_vmx_pt_finish;
> +
> +void l2_guest_code(void)
> +{
> +	u64 vmx_pt_delta;
> +
> +	vmcall();
> +	l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
> +
> +	//
> +	// Wait until the 1st threshold has passed
> +	//
> +	do {
> +		l2_vmx_pt_finish = rdtsc();
> +		vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
> +				vmx_pt_rate;
> +	} while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
> +
> +	//
> +	// Force L2 through Save and Restore cycle
> +	//
> +	GUEST_SYNC(1);
> +
> +	l2_save_restore_done = 1;
> +
> +	//
> +	// Now wait for the preemption timer to fire and
> +	// exit to L1
> +	//
> +	while ((l2_vmx_pt_finish = rdtsc()))
> +		;
> +}
> +
> +void l1_guest_code(struct vmx_pages *vmx_pages)
> +{
> +#define L2_GUEST_STACK_SIZE 64
> +	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
> +	u64 l1_vmx_pt_start;
> +	u64 l1_vmx_pt_finish;
> +	u64 l1_tsc_deadline, l2_tsc_deadline;
> +
> +	GUEST_ASSERT(vmx_pages->vmcs_gpa);
> +	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
> +	GUEST_ASSERT(load_vmcs(vmx_pages));
> +	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
> +
> +	prepare_vmcs(vmx_pages, l2_guest_code,
> +		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
> +
> +	GUEST_ASSERT(!vmlaunch());
> +	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
> +	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);


Please use vmcs_read(VM_EXIT_INSTRUCTION_LEN) instead of 3.

> +
> +	//
> +	// Check for Preemption timer support and turn on PIN control
> +	//
> +	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
> +	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
> +			: MSR_IA32_VMX_PINBASED_CTLS);
> +	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
> +			: MSR_IA32_VMX_EXIT_CTLS);
> +
> +	if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
> +	    !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
> +		return;
> +


Why not do this check in the beginning of L1, before VMLAUNCH of L2 ? If 
these two controls are not supported, the test is just void.

> +	GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
> +			      vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
> +			      PIN_BASED_VMX_PREEMPTION_TIMER));
> +
> +	GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
> +			      PREEMPTION_TIMER_VALUE));
> +
> +	vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
> +
> +	l2_save_restore_done = 0;
> +
> +	l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
> +
> +	GUEST_ASSERT(!vmresume());
> +
> +	l1_vmx_pt_finish = rdtsc();
> +
> +	//
> +	// Ensure exit from L2 happens after L2 goes through
> +	// save and restore
> +	GUEST_ASSERT(l2_save_restore_done);
> +
> +	//
> +	// Ensure the exit from L2 is due to preemption timer expiry
> +	//
> +	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);


I am wondering if checking just the EXIT reason is enough to determine 
that L2 has gone through a successful migration.

> +
> +	l1_tsc_deadline = l1_vmx_pt_start +
> +		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
> +
> +	l2_tsc_deadline = l2_vmx_pt_start +
> +		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
> +
> +	//
> +	// Sync with the host and pass the l1|l2 pt_expiry_finish times and
> +	// tsc deadlines so that host can verify they are as expected
> +	//
> +	GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
> +		l2_vmx_pt_finish, l2_tsc_deadline);
> +}
> +
> +void guest_code(struct vmx_pages *vmx_pages)
> +{
> +	if (vmx_pages)
> +		l1_guest_code(vmx_pages);
> +
> +	GUEST_DONE();
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	vm_vaddr_t vmx_pages_gva = 0;
> +
> +	struct kvm_regs regs1, regs2;
> +	struct kvm_vm *vm;
> +	struct kvm_run *run;
> +	struct kvm_x86_state *state;
> +	struct ucall uc;
> +	int stage;
> +
> +	/*
> +	 * AMD currently does not implement any VMX features, so for now we
> +	 * just early out.
> +	 */
> +	nested_vmx_check_supported();
> +
> +	/* Create VM */
> +	vm = vm_create_default(VCPU_ID, 0, guest_code);
> +	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
> +	run = vcpu_state(vm, VCPU_ID);
> +
> +	vcpu_regs_get(vm, VCPU_ID, &regs1);
> +
> +	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
> +		vcpu_alloc_vmx(vm, &vmx_pages_gva);
> +		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
> +	} else {
> +		pr_info("will skip nested state checks\n");


This error message looks odd. Shouldn't it say something like,

     "nested state capability not available, skipping test"

> +		goto done;
> +	}
> +
> +	for (stage = 1;; stage++) {
> +		_vcpu_run(vm, VCPU_ID);
> +		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
> +			    "Stage %d: unexpected exit reason: %u (%s),\n",
> +			    stage, run->exit_reason,
> +			    exit_reason_str(run->exit_reason));
> +
> +		switch (get_ucall(vm, VCPU_ID, &uc)) {
> +		case UCALL_ABORT:
> +			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
> +				  __FILE__, uc.args[1]);
> +			/* NOT REACHED */
> +		case UCALL_SYNC:
> +			break;
> +		case UCALL_DONE:
> +			goto done;
> +		default:
> +			TEST_FAIL("Unknown ucall %lu", uc.cmd);
> +		}
> +
> +		/* UCALL_SYNC is handled here.  */
> +		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
> +			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
> +			    stage, (ulong)uc.args[1]);
> +		//
> +		// If this stage 2 then we should verify the vmx pt expiry
> +		// is as expected
> +		//
> +		if (stage == 2) {
> +
> +			pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
> +				stage, uc.args[2], uc.args[3]);
> +
> +			pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
> +				stage, uc.args[4], uc.args[5]);
> +
> +			//
> +			// From L1's perspective verify Preemption timer hasn't
> +			// expired too early
> +			//
> +
> +			TEST_ASSERT(uc.args[2] >= uc.args[3],
> +				"Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
> +				stage, uc.args[2], uc.args[3]);
> +
> +			//
> +			// From L2's perspective verify Preemption timer hasn't
> +			// expired too late
> +			//
> +			TEST_ASSERT(uc.args[4] < uc.args[5],
> +				"Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
> +				stage, uc.args[4], uc.args[5]);
> +		}
> +


For readability, it's better to put a block comment here for the entire 
save/restore operation as a whole.

Also, this save/restore block should be placed before the above stage2 
block.

> +		state = vcpu_save_state(vm, VCPU_ID);
> +		memset(&regs1, 0, sizeof(regs1));
> +		vcpu_regs_get(vm, VCPU_ID, &regs1);
> +
> +		kvm_vm_release(vm);
> +
> +		/* Restore state in a new VM.  */
> +		kvm_vm_restart(vm, O_RDWR);
> +		vm_vcpu_add(vm, VCPU_ID);
> +		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
> +		vcpu_load_state(vm, VCPU_ID, state);
> +		run = vcpu_state(vm, VCPU_ID);
> +		free(state);
> +
> +		memset(&regs2, 0, sizeof(regs2));
> +		vcpu_regs_get(vm, VCPU_ID, &regs2);
> +		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
> +			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
> +			    (ulong) regs2.rdi, (ulong) regs2.rsi);
> +	}
> +
> +done:
> +	kvm_vm_free(vm);
> +}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] KVM: selftests: VMX preemption timer migration test
  2020-05-20  2:14   ` Krish Sadhukhan
@ 2020-05-20 23:18     ` Makarand Sonare
  0 siblings, 0 replies; 8+ messages in thread
From: Makarand Sonare @ 2020-05-20 23:18 UTC (permalink / raw)
  To: Krish Sadhukhan; +Cc: kvm, pshier, jmattson

> On 5/18/20 1:16 PM, Makarand Sonare wrote:
>> When a nested VM with a VMX-preemption timer is migrated, verify that the
>> nested VM and its parent VM observe the VMX-preemption timer exit close
>> to
>> the original expiration deadline.
>>
>> Signed-off-by: Makarand Sonare <makarandsonare@google.com>
>> Reviewed-by: Jim Mattson <jmattson@google.com>
>> Change-Id: Ia79337c682ee161399525edc34201fad473fc190
>> ---
>>   tools/arch/x86/include/uapi/asm/kvm.h         |   1 +
>>   tools/testing/selftests/kvm/.gitignore        |   1 +
>>   tools/testing/selftests/kvm/Makefile          |   1 +
>>   .../testing/selftests/kvm/include/kvm_util.h  |   2 +
>>   .../selftests/kvm/include/x86_64/processor.h  |  11 +-
>>   .../selftests/kvm/include/x86_64/vmx.h        |  27 ++
>>   .../kvm/x86_64/vmx_preemption_timer_test.c    | 256 ++++++++++++++++++
>>   7 files changed, 295 insertions(+), 4 deletions(-)
>>   create mode 100644
>> tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
>>
>> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h
>> b/tools/arch/x86/include/uapi/asm/kvm.h
>> index 60701178b9cc1..13dca545554dc 100644
>> --- a/tools/arch/x86/include/uapi/asm/kvm.h
>> +++ b/tools/arch/x86/include/uapi/asm/kvm.h
>> @@ -402,6 +402,7 @@ struct kvm_sync_regs {
>>   struct kvm_vmx_nested_state_data {
>>   	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>>   	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>> +	__u64 preemption_timer_deadline;
>>   };
>>
>>   struct kvm_vmx_nested_state_hdr {
>> diff --git a/tools/testing/selftests/kvm/.gitignore
>> b/tools/testing/selftests/kvm/.gitignore
>> index 222e50104296a..f159718f90c0a 100644
>> --- a/tools/testing/selftests/kvm/.gitignore
>> +++ b/tools/testing/selftests/kvm/.gitignore
>> @@ -10,6 +10,7 @@
>>   /x86_64/set_sregs_test
>>   /x86_64/smm_test
>>   /x86_64/state_test
>> +/x86_64/vmx_preemption_timer_test
>>   /x86_64/svm_vmcall_test
>>   /x86_64/sync_regs_test
>>   /x86_64/vmx_close_while_nested_test
>> diff --git a/tools/testing/selftests/kvm/Makefile
>> b/tools/testing/selftests/kvm/Makefile
>> index c66f4eec34111..780f0c189a7bc 100644
>> --- a/tools/testing/selftests/kvm/Makefile
>> +++ b/tools/testing/selftests/kvm/Makefile
>> @@ -20,6 +20,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
>>   TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
>>   TEST_GEN_PROGS_x86_64 += x86_64/smm_test
>>   TEST_GEN_PROGS_x86_64 += x86_64/state_test
>> +TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
>>   TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
>>   TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
>>   TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
>> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h
>> b/tools/testing/selftests/kvm/include/kvm_util.h
>> index e244c6ecfc1d5..919e161dd2893 100644
>> --- a/tools/testing/selftests/kvm/include/kvm_util.h
>> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
>> @@ -314,6 +314,8 @@ void ucall_uninit(struct kvm_vm *vm);
>>   void ucall(uint64_t cmd, int nargs, ...);
>>   uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall
>> *uc);
>>
>> +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
>> +				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
>>   #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
>>   #define GUEST_DONE()		ucall(UCALL_DONE, 0)
>>   #define __GUEST_ASSERT(_condition, _nargs, _args...) do {	\
>> diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h
>> b/tools/testing/selftests/kvm/include/x86_64/processor.h
>> index 7428513a4c687..7cb19eca6c72d 100644
>> --- a/tools/testing/selftests/kvm/include/x86_64/processor.h
>> +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
>> @@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct
>> desc64 *desc)
>>   static inline uint64_t rdtsc(void)
>>   {
>>   	uint32_t eax, edx;
>> -
>> +	uint32_t tsc_val;
>>   	/*
>>   	 * The lfence is to wait (on Intel CPUs) until all previous
>> -	 * instructions have been executed.
>> +	 * instructions have been executed. If software requires RDTSC to be
>> +	 * executed prior to execution of any subsequent instruction, it can
>> +	 * execute LFENCE immediately after RDTSC
>>   	 */
>> -	__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
>> -	return ((uint64_t)edx) << 32 | eax;
>> +	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
>> +	tsc_val = ((uint64_t)edx) << 32 | eax;
>> +	return tsc_val;
>>   }
>>
>>   static inline uint64_t rdtscp(uint32_t *aux)
>> diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h
>> b/tools/testing/selftests/kvm/include/x86_64/vmx.h
>> index 3d27069b9ed9c..ccff3e6e27048 100644
>> --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
>> +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
>> @@ -575,6 +575,33 @@ struct vmx_pages {
>>   	void *eptp;
>>   };
>>
>> +union vmx_basic {
>> +	u64 val;
>> +	struct {
>> +		u32 revision;
>> +		u32	size:13,
>> +			reserved1:3,
>> +			width:1,
>> +			dual:1,
>> +			type:4,
>> +			insouts:1,
>> +			ctrl:1,
>> +			vm_entry_exception_ctrl:1,
>> +			reserved2:7;
>> +	};
>> +};
>> +
>> +union vmx_ctrl_msr {
>> +	u64 val;
>> +	struct {
>> +		u32 set, clr;
>> +	};
>> +};
>> +
>> +union vmx_basic basic;
>> +union vmx_ctrl_msr ctrl_pin_rev;
>> +union vmx_ctrl_msr ctrl_exit_rev;
>> +
>>   struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t
>> *p_vmx_gva);
>>   bool prepare_for_vmx_operation(struct vmx_pages *vmx);
>>   void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void
>> *guest_rsp);
>> diff --git
>> a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
>> b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
>> new file mode 100644
>> index 0000000000000..10893b11511be
>> --- /dev/null
>> +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
>> @@ -0,0 +1,256 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * VMX-preemption timer test
>> + *
>> + * Copyright (C) 2020, Google, LLC.
>> + *
>> + * Test to ensure the VM-Enter after migration doesn't
>> + * incorrectly restarts the timer with the full timer
>> + * value instead of partially decayed timer value
>> + *
>> + */
>> +#define _GNU_SOURCE /* for program_invocation_short_name */
>> +#include <fcntl.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <sys/ioctl.h>
>> +
>> +#include "test_util.h"
>> +
>> +#include "kvm_util.h"
>> +#include "processor.h"
>> +#include "vmx.h"
>> +
>> +#define VCPU_ID		5
>> +#define PREEMPTION_TIMER_VALUE			100000000ull
>> +#define PREEMPTION_TIMER_VALUE_THRESHOLD1	 80000000ull
>> +
>> +u32 vmx_pt_rate;
>> +bool l2_save_restore_done;
>> +static u64 l2_vmx_pt_start;
>> +volatile u64 l2_vmx_pt_finish;
>> +
>> +void l2_guest_code(void)
>> +{
>> +	u64 vmx_pt_delta;
>> +
>> +	vmcall();
>> +	l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
>> +
>> +	//
>> +	// Wait until the 1st threshold has passed
>> +	//
>> +	do {
>> +		l2_vmx_pt_finish = rdtsc();
>> +		vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
>> +				vmx_pt_rate;
>> +	} while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
>> +
>> +	//
>> +	// Force L2 through Save and Restore cycle
>> +	//
>> +	GUEST_SYNC(1);
>> +
>> +	l2_save_restore_done = 1;
>> +
>> +	//
>> +	// Now wait for the preemption timer to fire and
>> +	// exit to L1
>> +	//
>> +	while ((l2_vmx_pt_finish = rdtsc()))
>> +		;
>> +}
>> +
>> +void l1_guest_code(struct vmx_pages *vmx_pages)
>> +{
>> +#define L2_GUEST_STACK_SIZE 64
>> +	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
>> +	u64 l1_vmx_pt_start;
>> +	u64 l1_vmx_pt_finish;
>> +	u64 l1_tsc_deadline, l2_tsc_deadline;
>> +
>> +	GUEST_ASSERT(vmx_pages->vmcs_gpa);
>> +	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
>> +	GUEST_ASSERT(load_vmcs(vmx_pages));
>> +	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
>> +
>> +	prepare_vmcs(vmx_pages, l2_guest_code,
>> +		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
>> +
>> +	GUEST_ASSERT(!vmlaunch());
>> +	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
>> +	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
>
>
> Please use vmcs_read(VM_EXIT_INSTRUCTION_LEN) instead of 3.
>
>> +
>> +	//
>> +	// Check for Preemption timer support and turn on PIN control
>> +	//
>> +	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
>> +	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
>> +			: MSR_IA32_VMX_PINBASED_CTLS);
>> +	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
>> +			: MSR_IA32_VMX_EXIT_CTLS);
>> +
>> +	if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
>> +	    !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
>> +		return;
>> +
>
>
> Why not do this check in the beginning of L1, before VMLAUNCH of L2 ? If
> these two controls are not supported, the test is just void.
>
>> +	GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
>> +			      vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
>> +			      PIN_BASED_VMX_PREEMPTION_TIMER));
>> +
>> +	GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
>> +			      PREEMPTION_TIMER_VALUE));
>> +
>> +	vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
>> +
>> +	l2_save_restore_done = 0;
>> +
>> +	l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
>> +
>> +	GUEST_ASSERT(!vmresume());
>> +
>> +	l1_vmx_pt_finish = rdtsc();
>> +
>> +	//
>> +	// Ensure exit from L2 happens after L2 goes through
>> +	// save and restore
>> +	GUEST_ASSERT(l2_save_restore_done);
>> +
>> +	//
>> +	// Ensure the exit from L2 is due to preemption timer expiry
>> +	//
>> +	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
>
>
> I am wondering if checking just the EXIT reason is enough to determine
> that L2 has gone through a successful migration.
>
Yes. But I found it hugely helpful to pass the date to L0 for logging
in case of failures.

>> +
>> +	l1_tsc_deadline = l1_vmx_pt_start +
>> +		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
>> +
>> +	l2_tsc_deadline = l2_vmx_pt_start +
>> +		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
>> +
>> +	//
>> +	// Sync with the host and pass the l1|l2 pt_expiry_finish times and
>> +	// tsc deadlines so that host can verify they are as expected
>> +	//
>> +	GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
>> +		l2_vmx_pt_finish, l2_tsc_deadline);
>> +}
>> +
>> +void guest_code(struct vmx_pages *vmx_pages)
>> +{
>> +	if (vmx_pages)
>> +		l1_guest_code(vmx_pages);
>> +
>> +	GUEST_DONE();
>> +}
>> +
>> +int main(int argc, char *argv[])
>> +{
>> +	vm_vaddr_t vmx_pages_gva = 0;
>> +
>> +	struct kvm_regs regs1, regs2;
>> +	struct kvm_vm *vm;
>> +	struct kvm_run *run;
>> +	struct kvm_x86_state *state;
>> +	struct ucall uc;
>> +	int stage;
>> +
>> +	/*
>> +	 * AMD currently does not implement any VMX features, so for now we
>> +	 * just early out.
>> +	 */
>> +	nested_vmx_check_supported();
>> +
>> +	/* Create VM */
>> +	vm = vm_create_default(VCPU_ID, 0, guest_code);
>> +	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
>> +	run = vcpu_state(vm, VCPU_ID);
>> +
>> +	vcpu_regs_get(vm, VCPU_ID, &regs1);
>> +
>> +	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
>> +		vcpu_alloc_vmx(vm, &vmx_pages_gva);
>> +		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
>> +	} else {
>> +		pr_info("will skip nested state checks\n");
>
>
> This error message looks odd. Shouldn't it say something like,
>
>      "nested state capability not available, skipping test"
>
>> +		goto done;
>> +	}
>> +
>> +	for (stage = 1;; stage++) {
>> +		_vcpu_run(vm, VCPU_ID);
>> +		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
>> +			    "Stage %d: unexpected exit reason: %u (%s),\n",
>> +			    stage, run->exit_reason,
>> +			    exit_reason_str(run->exit_reason));
>> +
>> +		switch (get_ucall(vm, VCPU_ID, &uc)) {
>> +		case UCALL_ABORT:
>> +			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
>> +				  __FILE__, uc.args[1]);
>> +			/* NOT REACHED */
>> +		case UCALL_SYNC:
>> +			break;
>> +		case UCALL_DONE:
>> +			goto done;
>> +		default:
>> +			TEST_FAIL("Unknown ucall %lu", uc.cmd);
>> +		}
>> +
>> +		/* UCALL_SYNC is handled here.  */
>> +		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
>> +			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit,
>> got %lx",
>> +			    stage, (ulong)uc.args[1]);
>> +		//
>> +		// If this stage 2 then we should verify the vmx pt expiry
>> +		// is as expected
>> +		//
>> +		if (stage == 2) {
>> +
>> +			pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
>> +				stage, uc.args[2], uc.args[3]);
>> +
>> +			pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
>> +				stage, uc.args[4], uc.args[5]);
>> +
>> +			//
>> +			// From L1's perspective verify Preemption timer hasn't
>> +			// expired too early
>> +			//
>> +
>> +			TEST_ASSERT(uc.args[2] >= uc.args[3],
>> +				"Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
>> +				stage, uc.args[2], uc.args[3]);
>> +
>> +			//
>> +			// From L2's perspective verify Preemption timer hasn't
>> +			// expired too late
>> +			//
>> +			TEST_ASSERT(uc.args[4] < uc.args[5],
>> +				"Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
>> +				stage, uc.args[4], uc.args[5]);
>> +		}
>> +
>
>
> For readability, it's better to put a block comment here for the entire
> save/restore operation as a whole.
>
> Also, this save/restore block should be placed before the above stage2
> block.
>
>> +		state = vcpu_save_state(vm, VCPU_ID);
>> +		memset(&regs1, 0, sizeof(regs1));
>> +		vcpu_regs_get(vm, VCPU_ID, &regs1);
>> +
>> +		kvm_vm_release(vm);
>> +
>> +		/* Restore state in a new VM.  */
>> +		kvm_vm_restart(vm, O_RDWR);
>> +		vm_vcpu_add(vm, VCPU_ID);
>> +		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
>> +		vcpu_load_state(vm, VCPU_ID, state);
>> +		run = vcpu_state(vm, VCPU_ID);
>> +		free(state);
>> +
>> +		memset(&regs2, 0, sizeof(regs2));
>> +		vcpu_regs_get(vm, VCPU_ID, &regs2);
>> +		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
>> +			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi:
>> %lx",
>> +			    (ulong) regs2.rdi, (ulong) regs2.rsi);
>> +	}
>> +
>> +done:
>> +	kvm_vm_free(vm);
>> +}
>

-- 
Thanks,
Makarand Sonare

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-05-20 23:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-18 20:15 [PATCH 0/2] Fix VMX preemption timer migration Makarand Sonare
2020-05-18 20:15 ` [PATCH 1/2] KVM: nVMX: " Makarand Sonare
2020-05-19  0:49   ` Vitaly Kuznetsov
2020-05-19  7:18   ` Krish Sadhukhan
2020-05-18 20:16 ` [PATCH 2/2] KVM: selftests: VMX preemption timer migration test Makarand Sonare
2020-05-19  0:56   ` Vitaly Kuznetsov
2020-05-20  2:14   ` Krish Sadhukhan
2020-05-20 23:18     ` Makarand Sonare

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.