All of lore.kernel.org
 help / color / mirror / Atom feed
* [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests
@ 2022-10-20 15:23 Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow Maxim Levitsky
                   ` (15 more replies)
  0 siblings, 16 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

This is set of fixes and new unit tests that I developed for the
KVM unit tests.

I also did some work to separate the SVM code into a minimal
support library so that you could use it from an arbitrary test.

Best regards,
	Maxim Levitsky

Maxim Levitsky (16):
  x86: make irq_enable avoid the interrupt shadow
  x86: add few helper functions for apic local timer
  svm: use irq_enable instead of sti/nop
  svm: make svm_intr_intercept_mix_if/gif test a bit more robust
  svm: use apic_start_timer/apic_stop_timer instead of open coding it
  x86: Add test for #SMI during interrupt window
  x86: Add a simple test for SYSENTER instruction.
  svm: add nested shutdown test.
  svm: move svm spec definitions to lib/x86/svm.h
  svm: move some svm support functions into lib/x86/svm_lib.h
  svm: add svm_suported
  svm: move setup_svm to svm_lib.c
  svm: move vmcb_ident to svm_lib.c
  svm: rewerite vm entry macros
  svm: introduce svm_vcpu
  add IPI loss stress test

 lib/x86/apic.c            |  37 ++
 lib/x86/apic.h            |   6 +
 lib/x86/processor.h       |   9 +-
 lib/x86/svm.h             | 366 +++++++++++++++++++
 lib/x86/svm_lib.c         | 168 +++++++++
 lib/x86/svm_lib.h         | 142 ++++++++
 x86/Makefile.common       |   4 +-
 x86/Makefile.x86_64       |   5 +
 x86/apic.c                |   1 -
 x86/ioapic.c              |   1 -
 x86/ipi_stress.c          | 235 +++++++++++++
 x86/smm_int_window.c      | 125 +++++++
 x86/svm.c                 | 258 ++------------
 x86/svm.h                 | 453 +-----------------------
 x86/svm_npt.c             |  45 +--
 x86/svm_tests.c           | 724 ++++++++++++++++++++------------------
 x86/sysenter.c            | 127 +++++++
 x86/tscdeadline_latency.c |   1 -
 x86/unittests.cfg         |  15 +
 x86/vmx_tests.c           |   7 -
 20 files changed, 1669 insertions(+), 1060 deletions(-)
 create mode 100644 lib/x86/svm.h
 create mode 100644 lib/x86/svm_lib.c
 create mode 100644 lib/x86/svm_lib.h
 create mode 100644 x86/ipi_stress.c
 create mode 100644 x86/smm_int_window.c
 create mode 100644 x86/sysenter.c

-- 
2.26.3



^ permalink raw reply	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 18:01   ` Sean Christopherson
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer Maxim Levitsky
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Tests that need interrupt shadow can't rely on irq_enable function anyway,
as its comment states,  and it is useful to know for sure that interrupts
are enabled after the call to this function.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/processor.h       | 9 ++++-----
 x86/apic.c                | 1 -
 x86/ioapic.c              | 1 -
 x86/svm_tests.c           | 9 ---------
 x86/tscdeadline_latency.c | 1 -
 x86/vmx_tests.c           | 7 -------
 6 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 03242206..9db07346 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -720,13 +720,12 @@ static inline void irq_disable(void)
 	asm volatile("cli");
 }
 
-/* Note that irq_enable() does not ensure an interrupt shadow due
- * to the vagaries of compiler optimizations.  If you need the
- * shadow, use a single asm with "sti" and the instruction after it.
- */
 static inline void irq_enable(void)
 {
-	asm volatile("sti");
+	asm volatile(
+			"sti \n\t"
+			"nop\n\t"
+	);
 }
 
 static inline void invlpg(volatile void *va)
diff --git a/x86/apic.c b/x86/apic.c
index 23508ad5..a8964d88 100644
--- a/x86/apic.c
+++ b/x86/apic.c
@@ -36,7 +36,6 @@ static void __test_tsc_deadline_timer(void)
     irq_enable();
 
     wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC));
-    asm volatile ("nop");
     report(tdt_count == 1, "tsc deadline timer");
     report(rdmsr(MSR_IA32_TSCDEADLINE) == 0, "tsc deadline timer clearing");
 }
diff --git a/x86/ioapic.c b/x86/ioapic.c
index 4f578ce4..2e460a6d 100644
--- a/x86/ioapic.c
+++ b/x86/ioapic.c
@@ -129,7 +129,6 @@ static void test_ioapic_simultaneous(void)
 	toggle_irq_line(0x0f);
 	toggle_irq_line(0x0e);
 	irq_enable();
-	asm volatile ("nop");
 	report(g_66 && g_78 && g_66_after_78 && g_66_rip == g_78_rip,
 	       "ioapic simultaneous edge interrupts");
 }
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index e2ec9541..a6397821 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -1000,7 +1000,6 @@ static bool pending_event_finished(struct svm_test *test)
 		}
 
 		irq_enable();
-		asm volatile ("nop");
 		irq_disable();
 
 		if (!pending_event_ipi_fired) {
@@ -1056,7 +1055,6 @@ static void pending_event_cli_test(struct svm_test *test)
 
 	/* VINTR_MASKING is zero.  This should cause the IPI to fire.  */
 	irq_enable();
-	asm volatile ("nop");
 	irq_disable();
 
 	if (pending_event_ipi_fired != true) {
@@ -1072,7 +1070,6 @@ static void pending_event_cli_test(struct svm_test *test)
 	 * that L0 did not leave a stale VINTR in the VMCB.
 	 */
 	irq_enable();
-	asm volatile ("nop");
 	irq_disable();
 }
 
@@ -1105,7 +1102,6 @@ static bool pending_event_cli_finished(struct svm_test *test)
 		}
 
 		irq_enable();
-		asm volatile ("nop");
 		irq_disable();
 
 		if (pending_event_ipi_fired != true) {
@@ -1243,7 +1239,6 @@ static bool interrupt_finished(struct svm_test *test)
 		}
 
 		irq_enable();
-		asm volatile ("nop");
 		irq_disable();
 
 		vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR);
@@ -1540,7 +1535,6 @@ static void virq_inject_test(struct svm_test *test)
 	}
 
 	irq_enable();
-	asm volatile ("nop");
 	irq_disable();
 
 	if (!virq_fired) {
@@ -1557,7 +1551,6 @@ static void virq_inject_test(struct svm_test *test)
 	}
 
 	irq_enable();
-	asm volatile ("nop");
 	irq_disable();
 
 	if (!virq_fired) {
@@ -1568,7 +1561,6 @@ static void virq_inject_test(struct svm_test *test)
 	vmmcall();
 
 	irq_enable();
-	asm volatile ("nop");
 	irq_disable();
 
 	if (virq_fired) {
@@ -1739,7 +1731,6 @@ static bool reg_corruption_finished(struct svm_test *test)
 		void* guest_rip = (void*)vmcb->save.rip;
 
 		irq_enable();
-		asm volatile ("nop");
 		irq_disable();
 
 		if (guest_rip == insb_instruction_label && io_port_var != 0xAA) {
diff --git a/x86/tscdeadline_latency.c b/x86/tscdeadline_latency.c
index a3bc4ea4..c54530dd 100644
--- a/x86/tscdeadline_latency.c
+++ b/x86/tscdeadline_latency.c
@@ -73,7 +73,6 @@ static void start_tsc_deadline_timer(void)
     irq_enable();
 
     wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC)+delta);
-    asm volatile ("nop");
 }
 
 static int enable_tsc_deadline_timer(void)
diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index aa2ecbbc..c8e68931 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c
@@ -1625,7 +1625,6 @@ static void interrupt_main(void)
 	apic_write(APIC_TMICT, 1000000);
 
 	irq_enable();
-	asm volatile ("nop");
 	vmcall();
 
 	report(rdtsc() - start > 10000 && timer_fired,
@@ -1639,7 +1638,6 @@ static void interrupt_main(void)
 	apic_write(APIC_TMICT, 1000000);
 
 	irq_enable();
-	asm volatile ("nop");
 	vmcall();
 
 	report(rdtsc() - start > 10000 && timer_fired,
@@ -1709,7 +1707,6 @@ static int interrupt_exit_handler(union exit_reason exit_reason)
 			handle_external_interrupt(vector);
 		} else {
 			irq_enable();
-			asm volatile ("nop");
 			irq_disable();
 		}
 		if (vmx_get_test_stage() >= 2)
@@ -6792,7 +6789,6 @@ static void test_x2apic_wr(
 
 		/* Clear the external interrupt. */
 		irq_enable();
-		asm volatile ("nop");
 		irq_disable();
 		report(handle_x2apic_ipi_ran,
 		       "Got pending interrupt after IRQ enabled.");
@@ -8543,7 +8539,6 @@ static void vmx_pending_event_test_core(bool guest_hlt)
 	       "Guest did not run before host received IPI");
 
 	irq_enable();
-	asm volatile ("nop");
 	irq_disable();
 	report(vmx_pending_event_ipi_fired,
 	       "Got pending interrupt after IRQ enabled");
@@ -9526,8 +9521,6 @@ static void vmx_hlt_with_rvi_guest(void)
 	handle_irq(HLT_WITH_RVI_VECTOR, vmx_hlt_with_rvi_guest_isr);
 
 	irq_enable();
-	asm volatile ("nop");
-
 	vmcall();
 }
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 19:14   ` Sean Christopherson
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 03/16] svm: use irq_enable instead of sti/nop Maxim Levitsky
                   ` (13 subsequent siblings)
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Add a few functions to apic.c to make it easier to enable and disable
the local apic timer.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/apic.c | 37 +++++++++++++++++++++++++++++++++++++
 lib/x86/apic.h |  6 ++++++
 2 files changed, 43 insertions(+)

diff --git a/lib/x86/apic.c b/lib/x86/apic.c
index 5131525a..dc6d3862 100644
--- a/lib/x86/apic.c
+++ b/lib/x86/apic.c
@@ -256,3 +256,40 @@ void init_apic_map(void)
 			id_map[j++] = i;
 	}
 }
+
+void apic_setup_timer(int vector, bool periodic)
+{
+	/* APIC runs with 'CPU core clock' divided by value in APIC_TDCR */
+
+	u32 lvtt = vector |
+			(periodic ? APIC_LVT_TIMER_PERIODIC : APIC_LVT_TIMER_ONESHOT);
+
+	apic_cleanup_timer();
+	apic_write(APIC_TDCR, APIC_TDR_DIV_1);
+	apic_write(APIC_LVTT, lvtt);
+}
+
+void apic_start_timer(u32 counter)
+{
+	apic_write(APIC_TMICT, counter);
+}
+
+void apic_stop_timer(void)
+{
+	apic_write(APIC_TMICT, 0);
+}
+
+void apic_cleanup_timer(void)
+{
+	u32 lvtt = apic_read(APIC_LVTT);
+
+	// stop the counter
+	apic_stop_timer();
+
+	// mask the timer interrupt
+	apic_write(APIC_LVTT, lvtt | APIC_LVT_MASKED);
+
+	// ensure that a pending timer is serviced
+	irq_enable();
+	irq_disable();
+}
diff --git a/lib/x86/apic.h b/lib/x86/apic.h
index 6d27f047..db691e2a 100644
--- a/lib/x86/apic.h
+++ b/lib/x86/apic.h
@@ -58,6 +58,12 @@ void disable_apic(void);
 void reset_apic(void);
 void init_apic_map(void);
 
+void apic_cleanup_timer(void);
+void apic_setup_timer(int vector, bool periodic);
+
+void apic_start_timer(u32 counter);
+void apic_stop_timer(void);
+
 /* Converts byte-addressable APIC register offset to 4-byte offset. */
 static inline u32 apic_reg_index(u32 reg)
 {
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 03/16] svm: use irq_enable instead of sti/nop
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 04/16] svm: make svm_intr_intercept_mix_if/gif test a bit more robust Maxim Levitsky
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Use irq_enable instead of open coded sti;nop also while at it,
remove nop after stgi/clgi - these instructions don't have an
interrupt window.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 x86/svm_tests.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index a6397821..a6b26e72 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -3141,8 +3141,7 @@ static void svm_intr_intercept_mix_if_guest(struct svm_test *test)
 {
 	asm volatile("nop;nop;nop;nop");
 	report(!dummy_isr_recevied, "No interrupt expected");
-	sti();
-	asm volatile("nop");
+	irq_enable();
 	report(0, "must not reach here");
 }
 
@@ -3172,12 +3171,10 @@ static void svm_intr_intercept_mix_gif_guest(struct svm_test *test)
 	// clear GIF and enable IF
 	// that should still not cause VM exit
 	clgi();
-	sti();
-	asm volatile("nop");
+	irq_enable();
 	report(!dummy_isr_recevied, "No interrupt expected");
 
 	stgi();
-	asm volatile("nop");
 	report(0, "must not reach here");
 }
 
@@ -3207,7 +3204,6 @@ static void svm_intr_intercept_mix_gif_guest2(struct svm_test *test)
 	report(!dummy_isr_recevied, "No interrupt expected");
 
 	stgi();
-	asm volatile("nop");
 	report(0, "must not reach here");
 }
 
@@ -3232,14 +3228,11 @@ static void svm_intr_intercept_mix_nmi_guest(struct svm_test *test)
 	cli(); // should have no effect
 
 	clgi();
-	asm volatile("nop");
 	apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_NMI, 0);
-	sti(); // should have no effect
-	asm volatile("nop");
+	irq_enable();
 	report(!nmi_recevied, "No NMI expected");
 
 	stgi();
-	asm volatile("nop");
 	report(0, "must not reach here");
 }
 
@@ -3263,12 +3256,9 @@ static void svm_intr_intercept_mix_smi_guest(struct svm_test *test)
 	asm volatile("nop;nop;nop;nop");
 
 	clgi();
-	asm volatile("nop");
 	apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_SMI, 0);
-	sti(); // should have no effect
-	asm volatile("nop");
+	irq_enable();
 	stgi();
-	asm volatile("nop");
 	report(0, "must not reach here");
 }
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 04/16] svm: make svm_intr_intercept_mix_if/gif test a bit more robust
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (2 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 03/16] svm: use irq_enable instead of sti/nop Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 05/16] svm: use apic_start_timer/apic_stop_timer instead of open coding it Maxim Levitsky
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

When injecting self IPI the test assumes that initial EFLAGS.IF flag is
zero, but previous tests might have set it.

Explicitly disable interrupts to avoid this assumption.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 x86/svm_tests.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index a6b26e72..d734e5f7 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -3155,6 +3155,7 @@ static void svm_intr_intercept_mix_if(void)
 	vmcb->save.rflags &= ~X86_EFLAGS_IF;
 
 	test_set_guest(svm_intr_intercept_mix_if_guest);
+	irq_disable();
 	apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | 0x55, 0);
 	svm_intr_intercept_mix_run_guest(&dummy_isr_recevied, SVM_EXIT_INTR);
 }
@@ -3187,6 +3188,7 @@ static void svm_intr_intercept_mix_gif(void)
 	vmcb->save.rflags &= ~X86_EFLAGS_IF;
 
 	test_set_guest(svm_intr_intercept_mix_gif_guest);
+	irq_disable();
 	apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | 0x55, 0);
 	svm_intr_intercept_mix_run_guest(&dummy_isr_recevied, SVM_EXIT_INTR);
 }
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 05/16] svm: use apic_start_timer/apic_stop_timer instead of open coding it
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (3 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 04/16] svm: make svm_intr_intercept_mix_if/gif test a bit more robust Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 06/16] x86: Add test for #SMI during interrupt window Maxim Levitsky
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

This simplified code and ensures that after a subtest used apic timer,
it won't affect next subtests which are run after it.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 x86/svm_tests.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index d734e5f7..19b35e95 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -1147,9 +1147,10 @@ static void interrupt_test(struct svm_test *test)
 {
 	long long start, loops;
 
-	apic_write(APIC_LVTT, TIMER_VECTOR);
+	apic_setup_timer(TIMER_VECTOR, false);
+
 	irq_enable();
-	apic_write(APIC_TMICT, 1); //Timer Initial Count Register 0x380 one-shot
+	apic_start_timer(1);
 	for (loops = 0; loops < 10000000 && !timer_fired; loops++)
 		asm volatile ("nop");
 
@@ -1160,12 +1161,12 @@ static void interrupt_test(struct svm_test *test)
 		vmmcall();
 	}
 
-	apic_write(APIC_TMICT, 0);
+	apic_stop_timer();
 	irq_disable();
 	vmmcall();
 
 	timer_fired = false;
-	apic_write(APIC_TMICT, 1);
+	apic_start_timer(1);
 	for (loops = 0; loops < 10000000 && !timer_fired; loops++)
 		asm volatile ("nop");
 
@@ -1177,12 +1178,12 @@ static void interrupt_test(struct svm_test *test)
 	}
 
 	irq_enable();
-	apic_write(APIC_TMICT, 0);
+	apic_stop_timer();
 	irq_disable();
 
 	timer_fired = false;
 	start = rdtsc();
-	apic_write(APIC_TMICT, 1000000);
+	apic_start_timer(1000000);
 	safe_halt();
 
 	report(rdtsc() - start > 10000 && timer_fired,
@@ -1193,13 +1194,13 @@ static void interrupt_test(struct svm_test *test)
 		vmmcall();
 	}
 
-	apic_write(APIC_TMICT, 0);
+	apic_stop_timer();
 	irq_disable();
 	vmmcall();
 
 	timer_fired = false;
 	start = rdtsc();
-	apic_write(APIC_TMICT, 1000000);
+	apic_start_timer(1000000);
 	asm volatile ("hlt");
 
 	report(rdtsc() - start > 10000 && timer_fired,
@@ -1210,7 +1211,7 @@ static void interrupt_test(struct svm_test *test)
 		vmmcall();
 	}
 
-	apic_write(APIC_TMICT, 0);
+	apic_cleanup_timer();
 	irq_disable();
 }
 
@@ -1693,10 +1694,8 @@ static void reg_corruption_prepare(struct svm_test *test)
 	handle_irq(TIMER_VECTOR, reg_corruption_isr);
 
 	/* set local APIC to inject external interrupts */
-	apic_write(APIC_TMICT, 0);
-	apic_write(APIC_TDCR, 0);
-	apic_write(APIC_LVTT, TIMER_VECTOR | APIC_LVT_TIMER_PERIODIC);
-	apic_write(APIC_TMICT, 1000);
+	apic_setup_timer(TIMER_VECTOR, true);
+	apic_start_timer(1000);
 }
 
 static void reg_corruption_test(struct svm_test *test)
@@ -1742,8 +1741,7 @@ static bool reg_corruption_finished(struct svm_test *test)
 	}
 	return false;
 cleanup:
-	apic_write(APIC_LVTT, APIC_LVT_TIMER_MASK);
-	apic_write(APIC_TMICT, 0);
+	apic_cleanup_timer();
 	return true;
 
 }
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 06/16] x86: Add test for #SMI during interrupt window
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (4 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 05/16] svm: use apic_start_timer/apic_stop_timer instead of open coding it Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 07/16] x86: Add a simple test for SYSENTER instruction Maxim Levitsky
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

This test tests a corner case in which KVM doesn't
preserve STI interrupt shadow when #SMI arrives during it.

Due to apparent fact that STI interrupt shadow blocks real interrupts as well,
and thus prevents a vCPU kick to make the CPU enter SMM,
during the interrupt shadow, a workaround was used:

An instruction which gets VMexit anyway, but retried by
KVM is used in the interrupt shadow.

While emulating such instruction KVM doesn't reset the interrupt shadow
(because it retries it), but it can notice the pending #SMI and enter SMM,
thus the test tests that interrupt shadow in this case is preserved.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 x86/Makefile.common  |   3 +-
 x86/Makefile.x86_64  |   1 +
 x86/smm_int_window.c | 125 +++++++++++++++++++++++++++++++++++++++++++
 x86/unittests.cfg    |   5 ++
 4 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 x86/smm_int_window.c

diff --git a/x86/Makefile.common b/x86/Makefile.common
index b7010e2f..ed5e5c76 100644
--- a/x86/Makefile.common
+++ b/x86/Makefile.common
@@ -85,7 +85,8 @@ tests-common = $(TEST_DIR)/vmexit.$(exe) $(TEST_DIR)/tsc.$(exe) \
                $(TEST_DIR)/tsx-ctrl.$(exe) \
                $(TEST_DIR)/eventinj.$(exe) \
                $(TEST_DIR)/smap.$(exe) \
-               $(TEST_DIR)/umip.$(exe)
+               $(TEST_DIR)/umip.$(exe) \
+               $(TEST_DIR)/smm_int_window.$(exe)
 
 # The following test cases are disabled when building EFI tests because they
 # use absolute addresses in their inline assembly code, which cannot compile
diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
index 8f9463cd..865da07d 100644
--- a/x86/Makefile.x86_64
+++ b/x86/Makefile.x86_64
@@ -34,6 +34,7 @@ tests += $(TEST_DIR)/rdpru.$(exe)
 tests += $(TEST_DIR)/pks.$(exe)
 tests += $(TEST_DIR)/pmu_lbr.$(exe)
 
+
 ifeq ($(CONFIG_EFI),y)
 tests += $(TEST_DIR)/amd_sev.$(exe)
 endif
diff --git a/x86/smm_int_window.c b/x86/smm_int_window.c
new file mode 100644
index 00000000..a8bc9888
--- /dev/null
+++ b/x86/smm_int_window.c
@@ -0,0 +1,125 @@
+#include "libcflat.h"
+#include "apic.h"
+#include "processor.h"
+#include "smp.h"
+#include "isr.h"
+#include "delay.h"
+#include "asm/barrier.h"
+#include "alloc_page.h"
+
+volatile int bad_int_received;
+
+extern unsigned long shadow_label;
+
+static void dummy_ipi_isr(isr_regs_t *regs)
+{
+	/* should never reach here */
+	if (regs->rip == (unsigned long)&shadow_label) {
+		bad_int_received++;
+	}
+	eoi();
+}
+
+
+#define SELF_INT_VECTOR 0xBB
+
+volatile bool test_ended;
+volatile bool send_smi;
+
+static void vcpu1_code(void *data)
+{
+	/*
+	 * Flood vCPU0 with #SMIs
+	 *
+	 * Note that kvm unit tests run with seabios and its #SMI handler
+	 * is only installed on vCPU0 (BSP).
+	 * Sending #SMI to any other CPU will crash the guest
+
+	 * */
+	setup_vm();
+
+	while (!test_ended) {
+
+		if (send_smi) {
+			apic_icr_write(APIC_INT_ASSERT | APIC_DEST_PHYSICAL | APIC_DM_SMI, 0);
+			send_smi = false;
+		}
+		cpu_relax();
+	}
+}
+
+#define MEM_ALLOC_ORDER 16
+
+int main(void)
+{
+	int i;
+	unsigned volatile char *mem;
+
+	setup_vm();
+	cli();
+
+	mem = alloc_pages_flags(MEM_ALLOC_ORDER, AREA_ANY | FLAG_DONTZERO);
+	assert(mem);
+
+	handle_irq(SELF_INT_VECTOR, dummy_ipi_isr);
+	on_cpu_async(1, vcpu1_code, NULL);
+
+	for  (i = 0 ; i < (1 << MEM_ALLOC_ORDER) ; i++) {
+
+		apic_icr_write(APIC_INT_ASSERT | APIC_DEST_PHYSICAL | APIC_DM_FIXED | SELF_INT_VECTOR, 0);
+
+		/* in case the sender is still sending #SMI, wait for it*/
+		while (send_smi)
+			;
+
+		/* ask the peer vCPU to send SMI to us */
+		send_smi = true;
+
+		asm volatile("sti");
+		asm volatile("shadow_label:\n");
+
+		/*
+		 * The below memory access should never get an interrupt because
+		 * it is in an interrupt shadow from the STI.
+		 *
+		 * Note that seems that even if a real interrupt happens, it will
+		 * still not interrupt this instruction, thus vCPU kick from
+		 * vCPU1, when it attempts to send #SMI to us is not enough itself,
+		 * to trigger the switch to SMM mode at this point.
+		 * Therefore STI;CLI sequence itself doesn't lead to #SMI happening
+		 * in between these instructions.
+		 *
+		 * So make the an instruction that accesses a fresh memory, which will
+		 * force the CPU to  #VMEXIT and just before resuming the guest,
+		 * KVM might notice incoming #SMI, and enter the SMM
+		 * with a still pending interrupt shadow.
+		 *
+		 * Also note that, just an #VMEXITing instruction like CPUID
+		 * can't be used here, because KVM itself will emulate it,
+		 * and clear the interrupt shadow, prior to entering the SMM.
+		 *
+		 * Test that in this case, the interrupt shadow is preserved,
+		 * which means that upon exit from #SMI  handler, the instruction
+		 * should still not get the pending interrupt
+		 */
+
+		*(mem+(i<<12)) = 1;
+
+		asm volatile("cli");
+
+		if (bad_int_received)
+			break;
+	}
+
+	test_ended = 1;
+
+	while (cpus_active() > 1)
+		cpu_relax();
+
+	if (bad_int_received)
+		report (0, "Unexpected interrupt received during interrupt shadow");
+	else
+		report(1, "Test passed");
+
+	return report_summary();
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index ed651850..db9bb3ac 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -456,3 +456,8 @@ file = cet.flat
 arch = x86_64
 smp = 2
 extra_params = -enable-kvm -m 2048 -cpu host
+
+[smm_int_window]
+file = smm_int_window.flat
+smp = 2
+extra_params = -machine smm=on -machine kernel-irqchip=on -m 2g
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 07/16] x86: Add a simple test for SYSENTER instruction.
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (5 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 06/16] x86: Add test for #SMI during interrupt window Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 19:25   ` Sean Christopherson
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test Maxim Levitsky
                   ` (8 subsequent siblings)
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Run the test with Intel's vendor ID and in the long mode,
to test the emulation of this instruction on AMD.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 x86/Makefile.x86_64 |   2 +
 x86/sysenter.c      | 127 ++++++++++++++++++++++++++++++++++++++++++++
 x86/unittests.cfg   |   5 ++
 3 files changed, 134 insertions(+)
 create mode 100644 x86/sysenter.c

diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
index 865da07d..8ce53650 100644
--- a/x86/Makefile.x86_64
+++ b/x86/Makefile.x86_64
@@ -33,6 +33,7 @@ tests += $(TEST_DIR)/vmware_backdoors.$(exe)
 tests += $(TEST_DIR)/rdpru.$(exe)
 tests += $(TEST_DIR)/pks.$(exe)
 tests += $(TEST_DIR)/pmu_lbr.$(exe)
+tests += $(TEST_DIR)/sysenter.$(exe)
 
 
 ifeq ($(CONFIG_EFI),y)
@@ -60,3 +61,4 @@ $(TEST_DIR)/hyperv_clock.$(bin): $(TEST_DIR)/hyperv_clock.o
 $(TEST_DIR)/vmx.$(bin): $(TEST_DIR)/vmx_tests.o
 $(TEST_DIR)/svm.$(bin): $(TEST_DIR)/svm_tests.o
 $(TEST_DIR)/svm_npt.$(bin): $(TEST_DIR)/svm_npt.o
+$(TEST_DIR)/sysenter.o: CFLAGS += -Wa,-mintel64
diff --git a/x86/sysenter.c b/x86/sysenter.c
new file mode 100644
index 00000000..6c32fea4
--- /dev/null
+++ b/x86/sysenter.c
@@ -0,0 +1,127 @@
+#include "alloc.h"
+#include "libcflat.h"
+#include "processor.h"
+#include "msr.h"
+#include "desc.h"
+
+
+// undefine this to run the syscall instruction in 64 bit mode.
+// this won't work on AMD due to disabled code in the emulator.
+#define COMP32
+
+int main(int ac, char **av)
+{
+    extern void sysenter_target(void);
+    extern void test_done(void);
+
+    setup_vm();
+
+    int gdt_index = 0x50 >> 3;
+    ulong rax = 0xDEAD;
+
+    /* init the sysenter GDT block */
+    /*gdt64[gdt_index+0] = gdt64[KERNEL_CS >> 3];
+    gdt64[gdt_index+1] = gdt64[KERNEL_DS >> 3];
+    gdt64[gdt_index+2] = gdt64[USER_CS >> 3];
+    gdt64[gdt_index+3] = gdt64[USER_DS >> 3];*/
+
+    /* init the sysenter msrs*/
+    wrmsr(MSR_IA32_SYSENTER_CS, gdt_index << 3);
+    wrmsr(MSR_IA32_SYSENTER_ESP, 0xAAFFFFFFFF);
+    wrmsr(MSR_IA32_SYSENTER_EIP, (uint64_t)sysenter_target);
+
+    u8 *thunk = (u8*)malloc(50);
+    u8 *tmp = thunk;
+
+    printf("Thunk at 0x%lx\n", (u64)thunk);
+
+    /* movabs test_done, %rdx*/
+    *tmp++ = 0x48; *tmp++ = 0xBA;
+    *(u64 *)tmp = (uint64_t)test_done; tmp += 8;
+    /* jmp %%rdx*/
+    *tmp++ = 0xFF; *tmp++ = 0xe2;
+
+    asm volatile (
+#ifdef COMP32
+        "# switch to comp32, mode prior to running the test\n"
+        "ljmpl *1f\n"
+        "1:\n"
+        ".long 1f\n"
+        ".long " xstr(KERNEL_CS32) "\n"
+        "1:\n"
+        ".code32\n"
+#else
+		"# store the 64 bit thunk address to rdx\n"
+		"mov %[thunk], %%rdx\n"
+#endif
+		"#+++++++++++++++++++++++++++++++++++++++++++++++++++"
+		"# user code (64 bit or comp32)"
+		"#+++++++++++++++++++++++++++++++++++++++++++++++++++"
+
+		"# use sysenter to enter 64 bit system code\n"
+        "mov %%esp, %%ecx #stash rsp value\n"
+        "mov $1, %%ebx\n"
+        "sysenter\n"
+        "ud2\n"
+
+		"#+++++++++++++++++++++++++++++++++++++++++++++++++++\n"
+        "# 64 bit cpl=0 code"
+		"#+++++++++++++++++++++++++++++++++++++++++++++++++++\n"
+
+        ".code64\n"
+		"sysenter_target:\n"
+
+#ifdef COMP32
+		"test %%rbx, %%rbx # check if we are here for second time \n"
+        "jne 1f\n"
+        "movq %%rcx, %%rsp # restore stack pointer manually\n"
+        "jmp test_done\n"
+        "1:\n"
+#endif
+
+		"# test that MSR_IA32_SYSENTER_ESP is correct\n"
+        "movq $0xAAFFFFFFFF, %%rbx\n"
+        "movq $0xDEAD, %%rax\n"
+        "cmpq %%rsp, %%rbx \n"
+        "jne 1f\n"
+        "movq $0xACED, %%rax\n"
+
+        "# use sysexit to exit back\n"
+        "1:\n"
+#ifdef COMP32
+		"leaq sysexit_target, %%rdx\n"
+        "sysexit\n"
+        "sysexit_target:\n"
+		"# second sysenter to return to CPL=0 and 64 bit\n"
+        "# the sysenter handler will jump back to here without sysexit due to ebx=0\n"
+        ".code32\n"
+		"mov $0, %%ebx\n"
+        "sysenter\n"
+#else
+		"# this will go through thunk to test_done, which tests,\n"
+		"# that we can sysexit to high addresses\n"
+		".byte 0x48\n"
+        "sysexit\n"
+        "ud2\n"
+#endif
+
+		".code64\n"
+        "test_done:\n"
+		"nop\n"
+
+        : /*outputs*/
+        "=a" (rax)
+        : /* inputs*/
+		[thunk] "r" (thunk)
+        : /*clobbers*/
+        "rbx",  /* action flag for sysenter_target */
+        "rcx",  /* saved RSP */
+        "rdx",  /* used for SYSEXIT*/
+        "flags"
+     );
+
+    report(rax == 0xACED, "MSR_IA32_SYSENTER_ESP has correct value");
+    return report_summary();
+}
+
+
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index db9bb3ac..ebb3fdfc 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -223,6 +223,11 @@ file = syscall.flat
 arch = x86_64
 extra_params = -cpu Opteron_G1,vendor=AuthenticAMD
 
+[sysenter]
+file = sysenter.flat
+arch = x86_64
+extra_params = -cpu host,vendor=GenuineIntel
+
 [tsc]
 file = tsc.flat
 extra_params = -cpu kvm64,+rdtscp
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test.
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (6 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 07/16] x86: Add a simple test for SYSENTER instruction Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 15:26   ` Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 09/16] svm: move svm spec definitions to lib/x86/svm.h Maxim Levitsky
                   ` (7 subsequent siblings)
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Test that if L2 triggers a shutdown, this VM exits to L1
and doesn't crash the host.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 x86/svm_tests.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index 19b35e95..2c29c2b0 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -10,6 +10,7 @@
 #include "isr.h"
 #include "apic.h"
 #include "delay.h"
+#include "vmalloc.h"
 
 #define SVM_EXIT_MAX_DR_INTERCEPT 0x3f
 
@@ -3270,6 +3271,55 @@ static void svm_intr_intercept_mix_smi(void)
 	svm_intr_intercept_mix_run_guest(NULL, SVM_EXIT_SMI);
 }
 
+
+static void shutdown_intercept_test_guest(struct svm_test *test)
+{
+	asm volatile ("int3");
+	report_fail("should not reach here\n");
+
+}
+
+static void shutdown_intercept_test_guest2(struct svm_test *test)
+{
+	asm volatile ("ud2");
+	report_fail("should not reach here\n");
+
+}
+
+static void svm_shutdown_intercept_test(void)
+{
+	void* unmapped_address = alloc_vpage();
+
+	/*
+	 * Test that shutdown vm exit doesn't crash L0
+	 *
+	 * Test both native and emulated triple fault
+	 * (due to exception merging)
+	 */
+
+
+	/*
+	 * This will usually cause native SVM_EXIT_SHUTDOWN
+	 * (KVM usually doesn't intercept #PF)
+	 * */
+	test_set_guest(shutdown_intercept_test_guest);
+	vmcb->save.idtr.base = (u64)unmapped_address;
+	vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
+	svm_vmrun();
+	report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (BP->PF->DF->TRIPLE_FAULT) test passed");
+
+	/*
+	 * This will usually cause emulated SVM_EXIT_SHUTDOWN
+	 * (KVM usually intercepts #UD)
+	 */
+	test_set_guest(shutdown_intercept_test_guest2);
+	vmcb_ident(vmcb);
+	vmcb->save.idtr.limit = 0;
+	vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
+	svm_vmrun();
+	report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (UD->DF->TRIPLE_FAULT) test passed");
+}
+
 struct svm_test svm_tests[] = {
 	{ "null", default_supported, default_prepare,
 	  default_prepare_gif_clear, null_test,
@@ -3382,6 +3432,7 @@ struct svm_test svm_tests[] = {
 	TEST(svm_intr_intercept_mix_smi),
 	TEST(svm_tsc_scale_test),
 	TEST(pause_filter_test),
+	TEST(svm_shutdown_intercept_test),
 	{ NULL, NULL, NULL, NULL, NULL, NULL, NULL }
 };
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 09/16] svm: move svm spec definitions to lib/x86/svm.h
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (7 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 19:08   ` Sean Christopherson
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 10/16] svm: move some svm support functions into lib/x86/svm_lib.h Maxim Levitsky
                   ` (6 subsequent siblings)
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

This is first step of separating SVM code to a library

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/svm.h | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++
 x86/svm.h     | 359 +------------------------------------------------
 2 files changed, 365 insertions(+), 358 deletions(-)
 create mode 100644 lib/x86/svm.h

diff --git a/lib/x86/svm.h b/lib/x86/svm.h
new file mode 100644
index 00000000..df57122e
--- /dev/null
+++ b/lib/x86/svm.h
@@ -0,0 +1,364 @@
+
+#ifndef SRC_LIB_X86_SVM_H_
+#define SRC_LIB_X86_SVM_H_
+
+enum {
+	INTERCEPT_INTR,
+	INTERCEPT_NMI,
+	INTERCEPT_SMI,
+	INTERCEPT_INIT,
+	INTERCEPT_VINTR,
+	INTERCEPT_SELECTIVE_CR0,
+	INTERCEPT_STORE_IDTR,
+	INTERCEPT_STORE_GDTR,
+	INTERCEPT_STORE_LDTR,
+	INTERCEPT_STORE_TR,
+	INTERCEPT_LOAD_IDTR,
+	INTERCEPT_LOAD_GDTR,
+	INTERCEPT_LOAD_LDTR,
+	INTERCEPT_LOAD_TR,
+	INTERCEPT_RDTSC,
+	INTERCEPT_RDPMC,
+	INTERCEPT_PUSHF,
+	INTERCEPT_POPF,
+	INTERCEPT_CPUID,
+	INTERCEPT_RSM,
+	INTERCEPT_IRET,
+	INTERCEPT_INTn,
+	INTERCEPT_INVD,
+	INTERCEPT_PAUSE,
+	INTERCEPT_HLT,
+	INTERCEPT_INVLPG,
+	INTERCEPT_INVLPGA,
+	INTERCEPT_IOIO_PROT,
+	INTERCEPT_MSR_PROT,
+	INTERCEPT_TASK_SWITCH,
+	INTERCEPT_FERR_FREEZE,
+	INTERCEPT_SHUTDOWN,
+	INTERCEPT_VMRUN,
+	INTERCEPT_VMMCALL,
+	INTERCEPT_VMLOAD,
+	INTERCEPT_VMSAVE,
+	INTERCEPT_STGI,
+	INTERCEPT_CLGI,
+	INTERCEPT_SKINIT,
+	INTERCEPT_RDTSCP,
+	INTERCEPT_ICEBP,
+	INTERCEPT_WBINVD,
+	INTERCEPT_MONITOR,
+	INTERCEPT_MWAIT,
+	INTERCEPT_MWAIT_COND,
+};
+
+enum {
+		VMCB_CLEAN_INTERCEPTS = 1, /* Intercept vectors, TSC offset, pause filter count */
+		VMCB_CLEAN_PERM_MAP = 2,   /* IOPM Base and MSRPM Base */
+		VMCB_CLEAN_ASID = 4,	   /* ASID */
+		VMCB_CLEAN_INTR = 8,	   /* int_ctl, int_vector */
+		VMCB_CLEAN_NPT = 16,	   /* npt_en, nCR3, gPAT */
+		VMCB_CLEAN_CR = 32,		/* CR0, CR3, CR4, EFER */
+		VMCB_CLEAN_DR = 64,		/* DR6, DR7 */
+		VMCB_CLEAN_DT = 128,	   /* GDT, IDT */
+		VMCB_CLEAN_SEG = 256,	  /* CS, DS, SS, ES, CPL */
+		VMCB_CLEAN_CR2 = 512,	  /* CR2 only */
+		VMCB_CLEAN_LBR = 1024,	 /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
+		VMCB_CLEAN_AVIC = 2048,	/* APIC_BAR, APIC_BACKING_PAGE,
+					  PHYSICAL_TABLE pointer, LOGICAL_TABLE pointer */
+		VMCB_CLEAN_ALL = 4095,
+};
+
+struct __attribute__ ((__packed__)) vmcb_control_area {
+	u16 intercept_cr_read;
+	u16 intercept_cr_write;
+	u16 intercept_dr_read;
+	u16 intercept_dr_write;
+	u32 intercept_exceptions;
+	u64 intercept;
+	u8 reserved_1[40];
+	u16 pause_filter_thresh;
+	u16 pause_filter_count;
+	u64 iopm_base_pa;
+	u64 msrpm_base_pa;
+	u64 tsc_offset;
+	u32 asid;
+	u8 tlb_ctl;
+	u8 reserved_2[3];
+	u32 int_ctl;
+	u32 int_vector;
+	u32 int_state;
+	u8 reserved_3[4];
+	u32 exit_code;
+	u32 exit_code_hi;
+	u64 exit_info_1;
+	u64 exit_info_2;
+	u32 exit_int_info;
+	u32 exit_int_info_err;
+	u64 nested_ctl;
+	u8 reserved_4[16];
+	u32 event_inj;
+	u32 event_inj_err;
+	u64 nested_cr3;
+	u64 virt_ext;
+	u32 clean;
+	u32 reserved_5;
+	u64 next_rip;
+	u8 insn_len;
+	u8 insn_bytes[15];
+	u8 reserved_6[800];
+};
+
+#define TLB_CONTROL_DO_NOTHING 0
+#define TLB_CONTROL_FLUSH_ALL_ASID 1
+
+#define V_TPR_MASK 0x0f
+
+#define V_IRQ_SHIFT 8
+#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+
+#define V_GIF_ENABLED_SHIFT 25
+#define V_GIF_ENABLED_MASK (1 << V_GIF_ENABLED_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
+#define V_INTR_PRIO_SHIFT 16
+#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
+
+#define V_IGN_TPR_SHIFT 20
+#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+
+#define V_INTR_MASKING_SHIFT 24
+#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+
+#define SVM_INTERRUPT_SHADOW_MASK 1
+
+#define SVM_IOIO_STR_SHIFT 2
+#define SVM_IOIO_REP_SHIFT 3
+#define SVM_IOIO_SIZE_SHIFT 4
+#define SVM_IOIO_ASIZE_SHIFT 7
+
+#define SVM_IOIO_TYPE_MASK 1
+#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
+#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
+#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
+#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
+
+#define SVM_VM_CR_VALID_MASK	0x001fULL
+#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
+#define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
+
+#define TSC_RATIO_DEFAULT   0x0100000000ULL
+
+struct __attribute__ ((__packed__)) vmcb_seg {
+	u16 selector;
+	u16 attrib;
+	u32 limit;
+	u64 base;
+};
+
+struct __attribute__ ((__packed__)) vmcb_save_area {
+	struct vmcb_seg es;
+	struct vmcb_seg cs;
+	struct vmcb_seg ss;
+	struct vmcb_seg ds;
+	struct vmcb_seg fs;
+	struct vmcb_seg gs;
+	struct vmcb_seg gdtr;
+	struct vmcb_seg ldtr;
+	struct vmcb_seg idtr;
+	struct vmcb_seg tr;
+	u8 reserved_1[43];
+	u8 cpl;
+	u8 reserved_2[4];
+	u64 efer;
+	u8 reserved_3[112];
+	u64 cr4;
+	u64 cr3;
+	u64 cr0;
+	u64 dr7;
+	u64 dr6;
+	u64 rflags;
+	u64 rip;
+	u8 reserved_4[88];
+	u64 rsp;
+	u8 reserved_5[24];
+	u64 rax;
+	u64 star;
+	u64 lstar;
+	u64 cstar;
+	u64 sfmask;
+	u64 kernel_gs_base;
+	u64 sysenter_cs;
+	u64 sysenter_esp;
+	u64 sysenter_eip;
+	u64 cr2;
+	u8 reserved_6[32];
+	u64 g_pat;
+	u64 dbgctl;
+	u64 br_from;
+	u64 br_to;
+	u64 last_excp_from;
+	u64 last_excp_to;
+};
+
+struct __attribute__ ((__packed__)) vmcb {
+	struct vmcb_control_area control;
+	struct vmcb_save_area save;
+};
+
+#define SVM_CPUID_FEATURE_SHIFT 2
+#define SVM_CPUID_FUNC 0x8000000a
+
+#define SVM_VM_CR_SVM_DISABLE 4
+
+#define SVM_SELECTOR_S_SHIFT 4
+#define SVM_SELECTOR_DPL_SHIFT 5
+#define SVM_SELECTOR_P_SHIFT 7
+#define SVM_SELECTOR_AVL_SHIFT 8
+#define SVM_SELECTOR_L_SHIFT 9
+#define SVM_SELECTOR_DB_SHIFT 10
+#define SVM_SELECTOR_G_SHIFT 11
+
+#define SVM_SELECTOR_TYPE_MASK (0xf)
+#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
+#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
+#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
+#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
+#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
+#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
+#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
+
+#define SVM_SELECTOR_WRITE_MASK (1 << 1)
+#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
+#define SVM_SELECTOR_CODE_MASK (1 << 3)
+
+#define INTERCEPT_CR0_MASK 1
+#define INTERCEPT_CR3_MASK (1 << 3)
+#define INTERCEPT_CR4_MASK (1 << 4)
+#define INTERCEPT_CR8_MASK (1 << 8)
+
+#define INTERCEPT_DR0_MASK 1
+#define INTERCEPT_DR1_MASK (1 << 1)
+#define INTERCEPT_DR2_MASK (1 << 2)
+#define INTERCEPT_DR3_MASK (1 << 3)
+#define INTERCEPT_DR4_MASK (1 << 4)
+#define INTERCEPT_DR5_MASK (1 << 5)
+#define INTERCEPT_DR6_MASK (1 << 6)
+#define INTERCEPT_DR7_MASK (1 << 7)
+
+#define SVM_EVTINJ_VEC_MASK 0xff
+
+#define SVM_EVTINJ_TYPE_SHIFT 8
+#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
+#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
+
+#define SVM_EVTINJ_VALID (1 << 31)
+#define SVM_EVTINJ_VALID_ERR (1 << 11)
+
+#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
+
+#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
+#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
+#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
+#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
+
+#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
+#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
+
+#define SVM_EXIT_READ_CR0   0x000
+#define SVM_EXIT_READ_CR3   0x003
+#define SVM_EXIT_READ_CR4   0x004
+#define SVM_EXIT_READ_CR8   0x008
+#define SVM_EXIT_WRITE_CR0  0x010
+#define SVM_EXIT_WRITE_CR3  0x013
+#define SVM_EXIT_WRITE_CR4  0x014
+#define SVM_EXIT_WRITE_CR8  0x018
+#define SVM_EXIT_READ_DR0   0x020
+#define SVM_EXIT_READ_DR1   0x021
+#define SVM_EXIT_READ_DR2   0x022
+#define SVM_EXIT_READ_DR3   0x023
+#define SVM_EXIT_READ_DR4   0x024
+#define SVM_EXIT_READ_DR5   0x025
+#define SVM_EXIT_READ_DR6   0x026
+#define SVM_EXIT_READ_DR7   0x027
+#define SVM_EXIT_WRITE_DR0  0x030
+#define SVM_EXIT_WRITE_DR1  0x031
+#define SVM_EXIT_WRITE_DR2  0x032
+#define SVM_EXIT_WRITE_DR3  0x033
+#define SVM_EXIT_WRITE_DR4  0x034
+#define SVM_EXIT_WRITE_DR5  0x035
+#define SVM_EXIT_WRITE_DR6  0x036
+#define SVM_EXIT_WRITE_DR7  0x037
+#define SVM_EXIT_EXCP_BASE	  0x040
+#define SVM_EXIT_INTR	   0x060
+#define SVM_EXIT_NMI		0x061
+#define SVM_EXIT_SMI		0x062
+#define SVM_EXIT_INIT	   0x063
+#define SVM_EXIT_VINTR	  0x064
+#define SVM_EXIT_CR0_SEL_WRITE  0x065
+#define SVM_EXIT_IDTR_READ  0x066
+#define SVM_EXIT_GDTR_READ  0x067
+#define SVM_EXIT_LDTR_READ  0x068
+#define SVM_EXIT_TR_READ	0x069
+#define SVM_EXIT_IDTR_WRITE 0x06a
+#define SVM_EXIT_GDTR_WRITE 0x06b
+#define SVM_EXIT_LDTR_WRITE 0x06c
+#define SVM_EXIT_TR_WRITE   0x06d
+#define SVM_EXIT_RDTSC	  0x06e
+#define SVM_EXIT_RDPMC	  0x06f
+#define SVM_EXIT_PUSHF	  0x070
+#define SVM_EXIT_POPF	   0x071
+#define SVM_EXIT_CPUID	  0x072
+#define SVM_EXIT_RSM		0x073
+#define SVM_EXIT_IRET	   0x074
+#define SVM_EXIT_SWINT	  0x075
+#define SVM_EXIT_INVD	   0x076
+#define SVM_EXIT_PAUSE	  0x077
+#define SVM_EXIT_HLT		0x078
+#define SVM_EXIT_INVLPG	 0x079
+#define SVM_EXIT_INVLPGA	0x07a
+#define SVM_EXIT_IOIO	   0x07b
+#define SVM_EXIT_MSR		0x07c
+#define SVM_EXIT_TASK_SWITCH	0x07d
+#define SVM_EXIT_FERR_FREEZE	0x07e
+#define SVM_EXIT_SHUTDOWN   0x07f
+#define SVM_EXIT_VMRUN	  0x080
+#define SVM_EXIT_VMMCALL	0x081
+#define SVM_EXIT_VMLOAD	 0x082
+#define SVM_EXIT_VMSAVE	 0x083
+#define SVM_EXIT_STGI	   0x084
+#define SVM_EXIT_CLGI	   0x085
+#define SVM_EXIT_SKINIT	 0x086
+#define SVM_EXIT_RDTSCP	 0x087
+#define SVM_EXIT_ICEBP	  0x088
+#define SVM_EXIT_WBINVD	 0x089
+#define SVM_EXIT_MONITOR	0x08a
+#define SVM_EXIT_MWAIT	  0x08b
+#define SVM_EXIT_MWAIT_COND 0x08c
+#define SVM_EXIT_NPF		0x400
+
+#define SVM_EXIT_ERR		-1
+
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+
+#define SVM_CR0_RESERVED_MASK			0xffffffff00000000U
+#define SVM_CR3_LONG_MBZ_MASK			0xfff0000000000000U
+#define SVM_CR3_LONG_RESERVED_MASK		0x0000000000000fe7U
+#define SVM_CR3_PAE_LEGACY_RESERVED_MASK	0x0000000000000007U
+#define SVM_CR4_LEGACY_RESERVED_MASK		0xff08e000U
+#define SVM_CR4_RESERVED_MASK			0xffffffffff08e000U
+#define SVM_DR6_RESERVED_MASK			0xffffffffffff1ff0U
+#define SVM_DR7_RESERVED_MASK			0xffffffff0000cc00U
+#define SVM_EFER_RESERVED_MASK			0xffffffffffff0200U
+
+
+#endif /* SRC_LIB_X86_SVM_H_ */
diff --git a/x86/svm.h b/x86/svm.h
index 766ff7e3..73800bc7 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -2,367 +2,10 @@
 #define X86_SVM_H
 
 #include "libcflat.h"
+#include <x86/svm.h>
 
-enum {
-	INTERCEPT_INTR,
-	INTERCEPT_NMI,
-	INTERCEPT_SMI,
-	INTERCEPT_INIT,
-	INTERCEPT_VINTR,
-	INTERCEPT_SELECTIVE_CR0,
-	INTERCEPT_STORE_IDTR,
-	INTERCEPT_STORE_GDTR,
-	INTERCEPT_STORE_LDTR,
-	INTERCEPT_STORE_TR,
-	INTERCEPT_LOAD_IDTR,
-	INTERCEPT_LOAD_GDTR,
-	INTERCEPT_LOAD_LDTR,
-	INTERCEPT_LOAD_TR,
-	INTERCEPT_RDTSC,
-	INTERCEPT_RDPMC,
-	INTERCEPT_PUSHF,
-	INTERCEPT_POPF,
-	INTERCEPT_CPUID,
-	INTERCEPT_RSM,
-	INTERCEPT_IRET,
-	INTERCEPT_INTn,
-	INTERCEPT_INVD,
-	INTERCEPT_PAUSE,
-	INTERCEPT_HLT,
-	INTERCEPT_INVLPG,
-	INTERCEPT_INVLPGA,
-	INTERCEPT_IOIO_PROT,
-	INTERCEPT_MSR_PROT,
-	INTERCEPT_TASK_SWITCH,
-	INTERCEPT_FERR_FREEZE,
-	INTERCEPT_SHUTDOWN,
-	INTERCEPT_VMRUN,
-	INTERCEPT_VMMCALL,
-	INTERCEPT_VMLOAD,
-	INTERCEPT_VMSAVE,
-	INTERCEPT_STGI,
-	INTERCEPT_CLGI,
-	INTERCEPT_SKINIT,
-	INTERCEPT_RDTSCP,
-	INTERCEPT_ICEBP,
-	INTERCEPT_WBINVD,
-	INTERCEPT_MONITOR,
-	INTERCEPT_MWAIT,
-	INTERCEPT_MWAIT_COND,
-};
-
-enum {
-        VMCB_CLEAN_INTERCEPTS = 1, /* Intercept vectors, TSC offset, pause filter count */
-        VMCB_CLEAN_PERM_MAP = 2,   /* IOPM Base and MSRPM Base */
-        VMCB_CLEAN_ASID = 4,       /* ASID */
-        VMCB_CLEAN_INTR = 8,       /* int_ctl, int_vector */
-        VMCB_CLEAN_NPT = 16,       /* npt_en, nCR3, gPAT */
-        VMCB_CLEAN_CR = 32,        /* CR0, CR3, CR4, EFER */
-        VMCB_CLEAN_DR = 64,        /* DR6, DR7 */
-        VMCB_CLEAN_DT = 128,       /* GDT, IDT */
-        VMCB_CLEAN_SEG = 256,      /* CS, DS, SS, ES, CPL */
-        VMCB_CLEAN_CR2 = 512,      /* CR2 only */
-        VMCB_CLEAN_LBR = 1024,     /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
-        VMCB_CLEAN_AVIC = 2048,    /* APIC_BAR, APIC_BACKING_PAGE,
-				      PHYSICAL_TABLE pointer, LOGICAL_TABLE pointer */
-        VMCB_CLEAN_ALL = 4095,
-};
-
-struct __attribute__ ((__packed__)) vmcb_control_area {
-	u16 intercept_cr_read;
-	u16 intercept_cr_write;
-	u16 intercept_dr_read;
-	u16 intercept_dr_write;
-	u32 intercept_exceptions;
-	u64 intercept;
-	u8 reserved_1[40];
-	u16 pause_filter_thresh;
-	u16 pause_filter_count;
-	u64 iopm_base_pa;
-	u64 msrpm_base_pa;
-	u64 tsc_offset;
-	u32 asid;
-	u8 tlb_ctl;
-	u8 reserved_2[3];
-	u32 int_ctl;
-	u32 int_vector;
-	u32 int_state;
-	u8 reserved_3[4];
-	u32 exit_code;
-	u32 exit_code_hi;
-	u64 exit_info_1;
-	u64 exit_info_2;
-	u32 exit_int_info;
-	u32 exit_int_info_err;
-	u64 nested_ctl;
-	u8 reserved_4[16];
-	u32 event_inj;
-	u32 event_inj_err;
-	u64 nested_cr3;
-	u64 virt_ext;
-	u32 clean;
-	u32 reserved_5;
-	u64 next_rip;
-	u8 insn_len;
-	u8 insn_bytes[15];
-	u8 reserved_6[800];
-};
-
-#define TLB_CONTROL_DO_NOTHING 0
-#define TLB_CONTROL_FLUSH_ALL_ASID 1
-
-#define V_TPR_MASK 0x0f
-
-#define V_IRQ_SHIFT 8
-#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
-
-#define V_GIF_ENABLED_SHIFT 25
-#define V_GIF_ENABLED_MASK (1 << V_GIF_ENABLED_SHIFT)
-
-#define V_GIF_SHIFT 9
-#define V_GIF_MASK (1 << V_GIF_SHIFT)
-
-#define V_INTR_PRIO_SHIFT 16
-#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
-
-#define V_IGN_TPR_SHIFT 20
-#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
-
-#define V_INTR_MASKING_SHIFT 24
-#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
-
-#define SVM_INTERRUPT_SHADOW_MASK 1
-
-#define SVM_IOIO_STR_SHIFT 2
-#define SVM_IOIO_REP_SHIFT 3
-#define SVM_IOIO_SIZE_SHIFT 4
-#define SVM_IOIO_ASIZE_SHIFT 7
-
-#define SVM_IOIO_TYPE_MASK 1
-#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
-#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
-#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
-#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
-
-#define SVM_VM_CR_VALID_MASK	0x001fULL
-#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
-#define SVM_VM_CR_SVM_DIS_MASK  0x0010ULL
-
-#define TSC_RATIO_DEFAULT   0x0100000000ULL
-
-struct __attribute__ ((__packed__)) vmcb_seg {
-	u16 selector;
-	u16 attrib;
-	u32 limit;
-	u64 base;
-};
-
-struct __attribute__ ((__packed__)) vmcb_save_area {
-	struct vmcb_seg es;
-	struct vmcb_seg cs;
-	struct vmcb_seg ss;
-	struct vmcb_seg ds;
-	struct vmcb_seg fs;
-	struct vmcb_seg gs;
-	struct vmcb_seg gdtr;
-	struct vmcb_seg ldtr;
-	struct vmcb_seg idtr;
-	struct vmcb_seg tr;
-	u8 reserved_1[43];
-	u8 cpl;
-	u8 reserved_2[4];
-	u64 efer;
-	u8 reserved_3[112];
-	u64 cr4;
-	u64 cr3;
-	u64 cr0;
-	u64 dr7;
-	u64 dr6;
-	u64 rflags;
-	u64 rip;
-	u8 reserved_4[88];
-	u64 rsp;
-	u8 reserved_5[24];
-	u64 rax;
-	u64 star;
-	u64 lstar;
-	u64 cstar;
-	u64 sfmask;
-	u64 kernel_gs_base;
-	u64 sysenter_cs;
-	u64 sysenter_esp;
-	u64 sysenter_eip;
-	u64 cr2;
-	u8 reserved_6[32];
-	u64 g_pat;
-	u64 dbgctl;
-	u64 br_from;
-	u64 br_to;
-	u64 last_excp_from;
-	u64 last_excp_to;
-};
-
-struct __attribute__ ((__packed__)) vmcb {
-	struct vmcb_control_area control;
-	struct vmcb_save_area save;
-};
-
-#define SVM_CPUID_FEATURE_SHIFT 2
-#define SVM_CPUID_FUNC 0x8000000a
-
-#define SVM_VM_CR_SVM_DISABLE 4
-
-#define SVM_SELECTOR_S_SHIFT 4
-#define SVM_SELECTOR_DPL_SHIFT 5
-#define SVM_SELECTOR_P_SHIFT 7
-#define SVM_SELECTOR_AVL_SHIFT 8
-#define SVM_SELECTOR_L_SHIFT 9
-#define SVM_SELECTOR_DB_SHIFT 10
-#define SVM_SELECTOR_G_SHIFT 11
-
-#define SVM_SELECTOR_TYPE_MASK (0xf)
-#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
-#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
-#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
-#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
-#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
-#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
-#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
-
-#define SVM_SELECTOR_WRITE_MASK (1 << 1)
-#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
-#define SVM_SELECTOR_CODE_MASK (1 << 3)
-
-#define INTERCEPT_CR0_MASK 1
-#define INTERCEPT_CR3_MASK (1 << 3)
-#define INTERCEPT_CR4_MASK (1 << 4)
-#define INTERCEPT_CR8_MASK (1 << 8)
-
-#define INTERCEPT_DR0_MASK 1
-#define INTERCEPT_DR1_MASK (1 << 1)
-#define INTERCEPT_DR2_MASK (1 << 2)
-#define INTERCEPT_DR3_MASK (1 << 3)
-#define INTERCEPT_DR4_MASK (1 << 4)
-#define INTERCEPT_DR5_MASK (1 << 5)
-#define INTERCEPT_DR6_MASK (1 << 6)
-#define INTERCEPT_DR7_MASK (1 << 7)
-
-#define SVM_EVTINJ_VEC_MASK 0xff
-
-#define SVM_EVTINJ_TYPE_SHIFT 8
-#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
-#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
-
-#define SVM_EVTINJ_VALID (1 << 31)
-#define SVM_EVTINJ_VALID_ERR (1 << 11)
-
-#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
-#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
-
-#define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
-#define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
-#define	SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
-#define	SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
-
-#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
-#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
-
-#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
-#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
-#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
-
-#define	SVM_EXIT_READ_CR0 	0x000
-#define	SVM_EXIT_READ_CR3 	0x003
-#define	SVM_EXIT_READ_CR4 	0x004
-#define	SVM_EXIT_READ_CR8 	0x008
-#define	SVM_EXIT_WRITE_CR0 	0x010
-#define	SVM_EXIT_WRITE_CR3 	0x013
-#define	SVM_EXIT_WRITE_CR4 	0x014
-#define	SVM_EXIT_WRITE_CR8 	0x018
-#define	SVM_EXIT_READ_DR0 	0x020
-#define	SVM_EXIT_READ_DR1 	0x021
-#define	SVM_EXIT_READ_DR2 	0x022
-#define	SVM_EXIT_READ_DR3 	0x023
-#define	SVM_EXIT_READ_DR4 	0x024
-#define	SVM_EXIT_READ_DR5 	0x025
-#define	SVM_EXIT_READ_DR6 	0x026
-#define	SVM_EXIT_READ_DR7 	0x027
-#define	SVM_EXIT_WRITE_DR0 	0x030
-#define	SVM_EXIT_WRITE_DR1 	0x031
-#define	SVM_EXIT_WRITE_DR2 	0x032
-#define	SVM_EXIT_WRITE_DR3 	0x033
-#define	SVM_EXIT_WRITE_DR4 	0x034
-#define	SVM_EXIT_WRITE_DR5 	0x035
-#define	SVM_EXIT_WRITE_DR6 	0x036
-#define	SVM_EXIT_WRITE_DR7 	0x037
-#define SVM_EXIT_EXCP_BASE      0x040
-#define SVM_EXIT_INTR		0x060
-#define SVM_EXIT_NMI		0x061
-#define SVM_EXIT_SMI		0x062
-#define SVM_EXIT_INIT		0x063
-#define SVM_EXIT_VINTR		0x064
-#define SVM_EXIT_CR0_SEL_WRITE	0x065
-#define SVM_EXIT_IDTR_READ	0x066
-#define SVM_EXIT_GDTR_READ	0x067
-#define SVM_EXIT_LDTR_READ	0x068
-#define SVM_EXIT_TR_READ	0x069
-#define SVM_EXIT_IDTR_WRITE	0x06a
-#define SVM_EXIT_GDTR_WRITE	0x06b
-#define SVM_EXIT_LDTR_WRITE	0x06c
-#define SVM_EXIT_TR_WRITE	0x06d
-#define SVM_EXIT_RDTSC		0x06e
-#define SVM_EXIT_RDPMC		0x06f
-#define SVM_EXIT_PUSHF		0x070
-#define SVM_EXIT_POPF		0x071
-#define SVM_EXIT_CPUID		0x072
-#define SVM_EXIT_RSM		0x073
-#define SVM_EXIT_IRET		0x074
-#define SVM_EXIT_SWINT		0x075
-#define SVM_EXIT_INVD		0x076
-#define SVM_EXIT_PAUSE		0x077
-#define SVM_EXIT_HLT		0x078
-#define SVM_EXIT_INVLPG		0x079
-#define SVM_EXIT_INVLPGA	0x07a
-#define SVM_EXIT_IOIO		0x07b
-#define SVM_EXIT_MSR		0x07c
-#define SVM_EXIT_TASK_SWITCH	0x07d
-#define SVM_EXIT_FERR_FREEZE	0x07e
-#define SVM_EXIT_SHUTDOWN	0x07f
-#define SVM_EXIT_VMRUN		0x080
-#define SVM_EXIT_VMMCALL	0x081
-#define SVM_EXIT_VMLOAD		0x082
-#define SVM_EXIT_VMSAVE		0x083
-#define SVM_EXIT_STGI		0x084
-#define SVM_EXIT_CLGI		0x085
-#define SVM_EXIT_SKINIT		0x086
-#define SVM_EXIT_RDTSCP		0x087
-#define SVM_EXIT_ICEBP		0x088
-#define SVM_EXIT_WBINVD		0x089
-#define SVM_EXIT_MONITOR	0x08a
-#define SVM_EXIT_MWAIT		0x08b
-#define SVM_EXIT_MWAIT_COND	0x08c
-#define SVM_EXIT_NPF  		0x400
-
-#define SVM_EXIT_ERR		-1
-
-#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
-
-#define	SVM_CR0_RESERVED_MASK			0xffffffff00000000U
-#define	SVM_CR3_LONG_MBZ_MASK			0xfff0000000000000U
-#define	SVM_CR3_LONG_RESERVED_MASK		0x0000000000000fe7U
-#define SVM_CR3_PAE_LEGACY_RESERVED_MASK	0x0000000000000007U
-#define	SVM_CR4_LEGACY_RESERVED_MASK		0xff08e000U
-#define	SVM_CR4_RESERVED_MASK			0xffffffffff08e000U
-#define	SVM_DR6_RESERVED_MASK			0xffffffffffff1ff0U
-#define	SVM_DR7_RESERVED_MASK			0xffffffff0000cc00U
-#define	SVM_EFER_RESERVED_MASK			0xffffffffffff0200U
 
 #define MSR_BITMAP_SIZE 8192
-
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
 
 struct svm_test {
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 10/16] svm: move some svm support functions into lib/x86/svm_lib.h
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (8 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 09/16] svm: move svm spec definitions to lib/x86/svm.h Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 11/16] svm: add svm_suported Maxim Levitsky
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/svm_lib.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 x86/svm.c         | 36 +-------------------------------
 x86/svm.h         | 18 ----------------
 x86/svm_npt.c     |  1 +
 x86/svm_tests.c   |  1 +
 5 files changed, 56 insertions(+), 53 deletions(-)
 create mode 100644 lib/x86/svm_lib.h

diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
new file mode 100644
index 00000000..04910281
--- /dev/null
+++ b/lib/x86/svm_lib.h
@@ -0,0 +1,53 @@
+#ifndef SRC_LIB_X86_SVM_LIB_H_
+#define SRC_LIB_X86_SVM_LIB_H_
+
+#include <x86/svm.h>
+#include "processor.h"
+
+static inline bool npt_supported(void)
+{
+	return this_cpu_has(X86_FEATURE_NPT);
+}
+
+static inline bool vgif_supported(void)
+{
+	return this_cpu_has(X86_FEATURE_VGIF);
+}
+
+static inline bool lbrv_supported(void)
+{
+	return this_cpu_has(X86_FEATURE_LBRV);
+}
+
+static inline bool tsc_scale_supported(void)
+{
+	return this_cpu_has(X86_FEATURE_TSCRATEMSR);
+}
+
+static inline bool pause_filter_supported(void)
+{
+	return this_cpu_has(X86_FEATURE_PAUSEFILTER);
+}
+
+static inline bool pause_threshold_supported(void)
+{
+	return this_cpu_has(X86_FEATURE_PFTHRESHOLD);
+}
+
+static inline void vmmcall(void)
+{
+	asm volatile ("vmmcall" : : : "memory");
+}
+
+static inline void stgi(void)
+{
+	asm volatile ("stgi");
+}
+
+static inline void clgi(void)
+{
+	asm volatile ("clgi");
+}
+
+
+#endif /* SRC_LIB_X86_SVM_LIB_H_ */
diff --git a/x86/svm.c b/x86/svm.c
index ba435b4a..e4e638c7 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -14,6 +14,7 @@
 #include "alloc_page.h"
 #include "isr.h"
 #include "apic.h"
+#include "svm_lib.h"
 
 /* for the nested page table*/
 u64 *pml4e;
@@ -54,32 +55,6 @@ bool default_supported(void)
 	return true;
 }
 
-bool vgif_supported(void)
-{
-	return this_cpu_has(X86_FEATURE_VGIF);
-}
-
-bool lbrv_supported(void)
-{
-	return this_cpu_has(X86_FEATURE_LBRV);
-}
-
-bool tsc_scale_supported(void)
-{
-	return this_cpu_has(X86_FEATURE_TSCRATEMSR);
-}
-
-bool pause_filter_supported(void)
-{
-	return this_cpu_has(X86_FEATURE_PAUSEFILTER);
-}
-
-bool pause_threshold_supported(void)
-{
-	return this_cpu_has(X86_FEATURE_PFTHRESHOLD);
-}
-
-
 void default_prepare(struct svm_test *test)
 {
 	vmcb_ident(vmcb);
@@ -94,10 +69,6 @@ bool default_finished(struct svm_test *test)
 	return true; /* one vmexit */
 }
 
-bool npt_supported(void)
-{
-	return this_cpu_has(X86_FEATURE_NPT);
-}
 
 int get_test_stage(struct svm_test *test)
 {
@@ -128,11 +99,6 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
 	seg->base = base;
 }
 
-inline void vmmcall(void)
-{
-	asm volatile ("vmmcall" : : : "memory");
-}
-
 static test_guest_func guest_main;
 
 void test_set_guest(test_guest_func func)
diff --git a/x86/svm.h b/x86/svm.h
index 73800bc7..075ac566 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -53,21 +53,14 @@ u64 *npt_get_pdpe(u64 address);
 u64 *npt_get_pml4e(void);
 bool smp_supported(void);
 bool default_supported(void);
-bool vgif_supported(void);
-bool lbrv_supported(void);
-bool tsc_scale_supported(void);
-bool pause_filter_supported(void);
-bool pause_threshold_supported(void);
 void default_prepare(struct svm_test *test);
 void default_prepare_gif_clear(struct svm_test *test);
 bool default_finished(struct svm_test *test);
-bool npt_supported(void);
 int get_test_stage(struct svm_test *test);
 void set_test_stage(struct svm_test *test, int s);
 void inc_test_stage(struct svm_test *test);
 void vmcb_ident(struct vmcb *vmcb);
 struct regs get_regs(void);
-void vmmcall(void);
 int __svm_vmrun(u64 rip);
 void __svm_bare_vmrun(void);
 int svm_vmrun(void);
@@ -76,17 +69,6 @@ u64* get_npt_pte(u64 *pml4, u64 guest_addr, int level);
 
 extern struct vmcb *vmcb;
 
-static inline void stgi(void)
-{
-    asm volatile ("stgi");
-}
-
-static inline void clgi(void)
-{
-    asm volatile ("clgi");
-}
-
-
 
 #define SAVE_GPR_C                              \
         "xchg %%rbx, regs+0x8\n\t"              \
diff --git a/x86/svm_npt.c b/x86/svm_npt.c
index b791f1ac..8aac0bb6 100644
--- a/x86/svm_npt.c
+++ b/x86/svm_npt.c
@@ -2,6 +2,7 @@
 #include "vm.h"
 #include "alloc_page.h"
 #include "vmalloc.h"
+#include "svm_lib.h"
 
 static void *scratch_page;
 
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index 2c29c2b0..bbf64af2 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -11,6 +11,7 @@
 #include "apic.h"
 #include "delay.h"
 #include "vmalloc.h"
+#include "svm_lib.h"
 
 #define SVM_EXIT_MAX_DR_INTERCEPT 0x3f
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 11/16] svm: add svm_suported
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (9 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 10/16] svm: move some svm support functions into lib/x86/svm_lib.h Maxim Levitsky
@ 2022-10-20 15:23 ` Maxim Levitsky
  2022-10-20 18:21   ` Sean Christopherson
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 12/16] svm: move setup_svm to svm_lib.c Maxim Levitsky
                   ` (4 subsequent siblings)
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:23 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/svm_lib.h | 5 +++++
 x86/svm.c         | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index 04910281..2d13b066 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -4,6 +4,11 @@
 #include <x86/svm.h>
 #include "processor.h"
 
+static inline bool svm_supported(void)
+{
+	return this_cpu_has(X86_FEATURE_SVM);
+}
+
 static inline bool npt_supported(void)
 {
 	return this_cpu_has(X86_FEATURE_NPT);
diff --git a/x86/svm.c b/x86/svm.c
index e4e638c7..43791546 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -343,7 +343,7 @@ int run_svm_tests(int ac, char **av, struct svm_test *svm_tests)
 	ac--;
 	av++;
 
-	if (!this_cpu_has(X86_FEATURE_SVM)) {
+	if (!svm_supported()) {
 		printf("SVM not available\n");
 		return report_summary();
 	}
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 12/16] svm: move setup_svm to svm_lib.c
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (10 preceding siblings ...)
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 11/16] svm: add svm_suported Maxim Levitsky
@ 2022-10-20 15:24 ` Maxim Levitsky
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 13/16] svm: move vmcb_ident " Maxim Levitsky
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:24 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/svm.h       |   2 +
 lib/x86/svm_lib.c   | 105 ++++++++++++++++++++++++++++++++++++++++++++
 lib/x86/svm_lib.h   |  12 +++++
 x86/Makefile.x86_64 |   2 +
 x86/svm.c           |  90 ++-----------------------------------
 x86/svm.h           |   6 +--
 x86/svm_tests.c     |  18 +++++---
 7 files changed, 136 insertions(+), 99 deletions(-)
 create mode 100644 lib/x86/svm_lib.c

diff --git a/lib/x86/svm.h b/lib/x86/svm.h
index df57122e..106e15bf 100644
--- a/lib/x86/svm.h
+++ b/lib/x86/svm.h
@@ -2,6 +2,8 @@
 #ifndef SRC_LIB_X86_SVM_H_
 #define SRC_LIB_X86_SVM_H_
 
+#include "libcflat.h"
+
 enum {
 	INTERCEPT_INTR,
 	INTERCEPT_NMI,
diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
new file mode 100644
index 00000000..9e82e363
--- /dev/null
+++ b/lib/x86/svm_lib.c
@@ -0,0 +1,105 @@
+
+#include "svm_lib.h"
+#include "libcflat.h"
+#include "processor.h"
+#include "desc.h"
+#include "msr.h"
+#include "vm.h"
+#include "smp.h"
+#include "alloc_page.h"
+#include "fwcfg.h"
+
+/* for the nested page table*/
+static u64 *pml4e;
+
+static u8 *io_bitmap;
+static u8 io_bitmap_area[16384];
+
+static u8 *msr_bitmap;
+static u8 msr_bitmap_area[MSR_BITMAP_SIZE + PAGE_SIZE];
+
+
+u64 *npt_get_pte(u64 address)
+{
+	return get_pte(npt_get_pml4e(), (void*)address);
+}
+
+u64 *npt_get_pde(u64 address)
+{
+	struct pte_search search;
+	search = find_pte_level(npt_get_pml4e(), (void*)address, 2);
+	return search.pte;
+}
+
+u64 *npt_get_pdpe(u64 address)
+{
+	struct pte_search search;
+	search = find_pte_level(npt_get_pml4e(), (void*)address, 3);
+	return search.pte;
+}
+
+u64 *npt_get_pml4e(void)
+{
+	return pml4e;
+}
+
+u8* svm_get_msr_bitmap(void)
+{
+	return msr_bitmap;
+}
+
+u8* svm_get_io_bitmap(void)
+{
+	return io_bitmap;
+}
+
+static void set_additional_vcpu_msr(void *msr_efer)
+{
+	void *hsave = alloc_page();
+
+	wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave));
+	wrmsr(MSR_EFER, (ulong)msr_efer | EFER_SVME);
+}
+
+static void setup_npt(void)
+{
+	u64 size = fwcfg_get_u64(FW_CFG_RAM_SIZE);
+
+	/* Ensure all <4gb is mapped, e.g. if there's no RAM above 4gb. */
+	if (size < BIT_ULL(32))
+		size = BIT_ULL(32);
+
+	pml4e = alloc_page();
+
+	/* NPT accesses are treated as "user" accesses. */
+	__setup_mmu_range(pml4e, 0, size, X86_MMU_MAP_USER);
+}
+
+void setup_svm(void)
+{
+	void *hsave = alloc_page();
+	int i;
+
+	wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave));
+	wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SVME);
+
+	io_bitmap = (void *) ALIGN((ulong)io_bitmap_area, PAGE_SIZE);
+
+	msr_bitmap = (void *) ALIGN((ulong)msr_bitmap_area, PAGE_SIZE);
+
+	if (!npt_supported())
+		return;
+
+	for (i = 1; i < cpu_count(); i++)
+		on_cpu(i, (void *)set_additional_vcpu_msr, (void *)rdmsr(MSR_EFER));
+
+	printf("NPT detected - running all tests with NPT enabled\n");
+
+	/*
+	 * Nested paging supported - Build a nested page table
+	 * Build the page-table bottom-up and map everything with 4k
+	 * pages to get enough granularity for the NPT unit-tests.
+	 */
+
+	setup_npt();
+}
diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index 2d13b066..50664b24 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -54,5 +54,17 @@ static inline void clgi(void)
 	asm volatile ("clgi");
 }
 
+void setup_svm(void);
+
+u64 *npt_get_pte(u64 address);
+u64 *npt_get_pde(u64 address);
+u64 *npt_get_pdpe(u64 address);
+u64 *npt_get_pml4e(void);
+
+u8* svm_get_msr_bitmap(void);
+u8* svm_get_io_bitmap(void);
+
+#define MSR_BITMAP_SIZE 8192
+
 
 #endif /* SRC_LIB_X86_SVM_LIB_H_ */
diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
index 8ce53650..7e902551 100644
--- a/x86/Makefile.x86_64
+++ b/x86/Makefile.x86_64
@@ -19,6 +19,8 @@ COMMON_CFLAGS += -mno-red-zone -mno-sse -mno-sse2 $(fcf_protection_full)
 cflatobjs += lib/x86/setjmp64.o
 cflatobjs += lib/x86/intel-iommu.o
 cflatobjs += lib/x86/usermode.o
+cflatobjs += lib/x86/svm_lib.o
+
 
 tests = $(TEST_DIR)/apic.$(exe) \
 	  $(TEST_DIR)/emulator.$(exe) $(TEST_DIR)/idt_test.$(exe) \
diff --git a/x86/svm.c b/x86/svm.c
index 43791546..bf8caf54 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -16,35 +16,8 @@
 #include "apic.h"
 #include "svm_lib.h"
 
-/* for the nested page table*/
-u64 *pml4e;
-
 struct vmcb *vmcb;
 
-u64 *npt_get_pte(u64 address)
-{
-	return get_pte(npt_get_pml4e(), (void*)address);
-}
-
-u64 *npt_get_pde(u64 address)
-{
-	struct pte_search search;
-	search = find_pte_level(npt_get_pml4e(), (void*)address, 2);
-	return search.pte;
-}
-
-u64 *npt_get_pdpe(u64 address)
-{
-	struct pte_search search;
-	search = find_pte_level(npt_get_pml4e(), (void*)address, 3);
-	return search.pte;
-}
-
-u64 *npt_get_pml4e(void)
-{
-	return pml4e;
-}
-
 bool smp_supported(void)
 {
 	return cpu_count() > 1;
@@ -112,12 +85,6 @@ static void test_thunk(struct svm_test *test)
 	vmmcall();
 }
 
-u8 *io_bitmap;
-u8 io_bitmap_area[16384];
-
-u8 *msr_bitmap;
-u8 msr_bitmap_area[MSR_BITMAP_SIZE + PAGE_SIZE];
-
 void vmcb_ident(struct vmcb *vmcb)
 {
 	u64 vmcb_phys = virt_to_phys(vmcb);
@@ -153,12 +120,12 @@ void vmcb_ident(struct vmcb *vmcb)
 	ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
 		(1ULL << INTERCEPT_VMMCALL) |
 		(1ULL << INTERCEPT_SHUTDOWN);
-	ctrl->iopm_base_pa = virt_to_phys(io_bitmap);
-	ctrl->msrpm_base_pa = virt_to_phys(msr_bitmap);
+	ctrl->iopm_base_pa = virt_to_phys(svm_get_io_bitmap());
+	ctrl->msrpm_base_pa = virt_to_phys(svm_get_msr_bitmap());
 
 	if (npt_supported()) {
 		ctrl->nested_ctl = 1;
-		ctrl->nested_cr3 = (u64)pml4e;
+		ctrl->nested_cr3 = (u64)npt_get_pml4e();
 		ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
 	}
 }
@@ -247,57 +214,6 @@ static noinline void test_run(struct svm_test *test)
 		test->on_vcpu_done = true;
 }
 
-static void set_additional_vcpu_msr(void *msr_efer)
-{
-	void *hsave = alloc_page();
-
-	wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave));
-	wrmsr(MSR_EFER, (ulong)msr_efer | EFER_SVME);
-}
-
-static void setup_npt(void)
-{
-	u64 size = fwcfg_get_u64(FW_CFG_RAM_SIZE);
-
-	/* Ensure all <4gb is mapped, e.g. if there's no RAM above 4gb. */
-	if (size < BIT_ULL(32))
-		size = BIT_ULL(32);
-
-	pml4e = alloc_page();
-
-	/* NPT accesses are treated as "user" accesses. */
-	__setup_mmu_range(pml4e, 0, size, X86_MMU_MAP_USER);
-}
-
-static void setup_svm(void)
-{
-	void *hsave = alloc_page();
-	int i;
-
-	wrmsr(MSR_VM_HSAVE_PA, virt_to_phys(hsave));
-	wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SVME);
-
-	io_bitmap = (void *) ALIGN((ulong)io_bitmap_area, PAGE_SIZE);
-
-	msr_bitmap = (void *) ALIGN((ulong)msr_bitmap_area, PAGE_SIZE);
-
-	if (!npt_supported())
-		return;
-
-	for (i = 1; i < cpu_count(); i++)
-		on_cpu(i, (void *)set_additional_vcpu_msr, (void *)rdmsr(MSR_EFER));
-
-	printf("NPT detected - running all tests with NPT enabled\n");
-
-	/*
-	 * Nested paging supported - Build a nested page table
-	 * Build the page-table bottom-up and map everything with 4k
-	 * pages to get enough granularity for the NPT unit-tests.
-	 */
-
-	setup_npt();
-}
-
 int matched;
 
 static bool
diff --git a/x86/svm.h b/x86/svm.h
index 075ac566..f4343883 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -5,7 +5,6 @@
 #include <x86/svm.h>
 
 
-#define MSR_BITMAP_SIZE 8192
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
 
 struct svm_test {
@@ -47,10 +46,7 @@ struct regs {
 typedef void (*test_guest_func)(struct svm_test *);
 
 int run_svm_tests(int ac, char **av, struct svm_test *svm_tests);
-u64 *npt_get_pte(u64 address);
-u64 *npt_get_pde(u64 address);
-u64 *npt_get_pdpe(u64 address);
-u64 *npt_get_pml4e(void);
+
 bool smp_supported(void);
 bool default_supported(void);
 void default_prepare(struct svm_test *test);
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index bbf64af2..57b5b572 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -306,14 +306,13 @@ static bool check_next_rip(struct svm_test *test)
 	return address == vmcb->control.next_rip;
 }
 
-extern u8 *msr_bitmap;
 
 static void prepare_msr_intercept(struct svm_test *test)
 {
 	default_prepare(test);
 	vmcb->control.intercept |= (1ULL << INTERCEPT_MSR_PROT);
 	vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR);
-	memset(msr_bitmap, 0xff, MSR_BITMAP_SIZE);
+	memset(svm_get_msr_bitmap(), 0xff, MSR_BITMAP_SIZE);
 }
 
 static void test_msr_intercept(struct svm_test *test)
@@ -424,7 +423,7 @@ static bool msr_intercept_finished(struct svm_test *test)
 
 static bool check_msr_intercept(struct svm_test *test)
 {
-	memset(msr_bitmap, 0, MSR_BITMAP_SIZE);
+	memset(svm_get_msr_bitmap(), 0, MSR_BITMAP_SIZE);
 	return (test->scratch == -2);
 }
 
@@ -536,10 +535,10 @@ static bool check_mode_switch(struct svm_test *test)
 	return test->scratch == 2;
 }
 
-extern u8 *io_bitmap;
-
 static void prepare_ioio(struct svm_test *test)
 {
+	u8 *io_bitmap = svm_get_io_bitmap();
+
 	vmcb->control.intercept |= (1ULL << INTERCEPT_IOIO_PROT);
 	test->scratch = 0;
 	memset(io_bitmap, 0, 8192);
@@ -548,6 +547,8 @@ static void prepare_ioio(struct svm_test *test)
 
 static void test_ioio(struct svm_test *test)
 {
+	u8 *io_bitmap = svm_get_io_bitmap();
+
 	// stage 0, test IO pass
 	inb(0x5000);
 	outb(0x0, 0x5000);
@@ -611,7 +612,6 @@ static void test_ioio(struct svm_test *test)
 		goto fail;
 
 	return;
-
 fail:
 	report_fail("stage %d", get_test_stage(test));
 	test->scratch = -1;
@@ -620,6 +620,7 @@ fail:
 static bool ioio_finished(struct svm_test *test)
 {
 	unsigned port, size;
+	u8 *io_bitmap = svm_get_io_bitmap();
 
 	/* Only expect IOIO intercepts */
 	if (vmcb->control.exit_code == SVM_EXIT_VMMCALL)
@@ -644,6 +645,8 @@ static bool ioio_finished(struct svm_test *test)
 
 static bool check_ioio(struct svm_test *test)
 {
+	u8 *io_bitmap = svm_get_io_bitmap();
+
 	memset(io_bitmap, 0, 8193);
 	return test->scratch != -1;
 }
@@ -2325,7 +2328,8 @@ static void test_msrpm_iopm_bitmap_addrs(void)
 {
 	u64 saved_intercept = vmcb->control.intercept;
 	u64 addr_beyond_limit = 1ull << cpuid_maxphyaddr();
-	u64 addr = virt_to_phys(msr_bitmap) & (~((1ull << 12) - 1));
+	u64 addr = virt_to_phys(svm_get_msr_bitmap()) & (~((1ull << 12) - 1));
+	u8 *io_bitmap = svm_get_io_bitmap();
 
 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_MSR_PROT,
 			 addr_beyond_limit - 2 * PAGE_SIZE, SVM_EXIT_ERR,
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 13/16] svm: move vmcb_ident to svm_lib.c
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (11 preceding siblings ...)
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 12/16] svm: move setup_svm to svm_lib.c Maxim Levitsky
@ 2022-10-20 15:24 ` Maxim Levitsky
  2022-10-20 18:37   ` Sean Christopherson
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros Maxim Levitsky
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:24 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/svm_lib.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/x86/svm_lib.h |  4 ++++
 x86/svm.c         | 54 -----------------------------------------------
 x86/svm.h         |  1 -
 4 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
index 9e82e363..2b067c65 100644
--- a/lib/x86/svm_lib.c
+++ b/lib/x86/svm_lib.c
@@ -103,3 +103,57 @@ void setup_svm(void)
 
 	setup_npt();
 }
+
+void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+			 u64 base, u32 limit, u32 attr)
+{
+	seg->selector = selector;
+	seg->attrib = attr;
+	seg->limit = limit;
+	seg->base = base;
+}
+
+void vmcb_ident(struct vmcb *vmcb)
+{
+	u64 vmcb_phys = virt_to_phys(vmcb);
+	struct vmcb_save_area *save = &vmcb->save;
+	struct vmcb_control_area *ctrl = &vmcb->control;
+	u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+		| SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
+	u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
+		| SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
+	struct descriptor_table_ptr desc_table_ptr;
+
+	memset(vmcb, 0, sizeof(*vmcb));
+	asm volatile ("vmsave %0" : : "a"(vmcb_phys) : "memory");
+	vmcb_set_seg(&save->es, read_es(), 0, -1U, data_seg_attr);
+	vmcb_set_seg(&save->cs, read_cs(), 0, -1U, code_seg_attr);
+	vmcb_set_seg(&save->ss, read_ss(), 0, -1U, data_seg_attr);
+	vmcb_set_seg(&save->ds, read_ds(), 0, -1U, data_seg_attr);
+	sgdt(&desc_table_ptr);
+	vmcb_set_seg(&save->gdtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0);
+	sidt(&desc_table_ptr);
+	vmcb_set_seg(&save->idtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0);
+	ctrl->asid = 1;
+	save->cpl = 0;
+	save->efer = rdmsr(MSR_EFER);
+	save->cr4 = read_cr4();
+	save->cr3 = read_cr3();
+	save->cr0 = read_cr0();
+	save->dr7 = read_dr7();
+	save->dr6 = read_dr6();
+	save->cr2 = read_cr2();
+	save->g_pat = rdmsr(MSR_IA32_CR_PAT);
+	save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
+	ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
+		(1ULL << INTERCEPT_VMMCALL) |
+		(1ULL << INTERCEPT_SHUTDOWN);
+	ctrl->iopm_base_pa = virt_to_phys(io_bitmap);
+	ctrl->msrpm_base_pa = virt_to_phys(msr_bitmap);
+
+	if (npt_supported()) {
+		ctrl->nested_ctl = 1;
+		ctrl->nested_cr3 = (u64)pml4e;
+		ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+	}
+}
diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index 50664b24..27c3b137 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -54,7 +54,11 @@ static inline void clgi(void)
 	asm volatile ("clgi");
 }
 
+void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
+				  u64 base, u32 limit, u32 attr);
+
 void setup_svm(void);
+void vmcb_ident(struct vmcb *vmcb);
 
 u64 *npt_get_pte(u64 address);
 u64 *npt_get_pde(u64 address);
diff --git a/x86/svm.c b/x86/svm.c
index bf8caf54..37b4cd38 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -63,15 +63,6 @@ void inc_test_stage(struct svm_test *test)
 	barrier();
 }
 
-static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
-			 u64 base, u32 limit, u32 attr)
-{
-	seg->selector = selector;
-	seg->attrib = attr;
-	seg->limit = limit;
-	seg->base = base;
-}
-
 static test_guest_func guest_main;
 
 void test_set_guest(test_guest_func func)
@@ -85,51 +76,6 @@ static void test_thunk(struct svm_test *test)
 	vmmcall();
 }
 
-void vmcb_ident(struct vmcb *vmcb)
-{
-	u64 vmcb_phys = virt_to_phys(vmcb);
-	struct vmcb_save_area *save = &vmcb->save;
-	struct vmcb_control_area *ctrl = &vmcb->control;
-	u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
-		| SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
-	u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
-		| SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
-	struct descriptor_table_ptr desc_table_ptr;
-
-	memset(vmcb, 0, sizeof(*vmcb));
-	asm volatile ("vmsave %0" : : "a"(vmcb_phys) : "memory");
-	vmcb_set_seg(&save->es, read_es(), 0, -1U, data_seg_attr);
-	vmcb_set_seg(&save->cs, read_cs(), 0, -1U, code_seg_attr);
-	vmcb_set_seg(&save->ss, read_ss(), 0, -1U, data_seg_attr);
-	vmcb_set_seg(&save->ds, read_ds(), 0, -1U, data_seg_attr);
-	sgdt(&desc_table_ptr);
-	vmcb_set_seg(&save->gdtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0);
-	sidt(&desc_table_ptr);
-	vmcb_set_seg(&save->idtr, 0, desc_table_ptr.base, desc_table_ptr.limit, 0);
-	ctrl->asid = 1;
-	save->cpl = 0;
-	save->efer = rdmsr(MSR_EFER);
-	save->cr4 = read_cr4();
-	save->cr3 = read_cr3();
-	save->cr0 = read_cr0();
-	save->dr7 = read_dr7();
-	save->dr6 = read_dr6();
-	save->cr2 = read_cr2();
-	save->g_pat = rdmsr(MSR_IA32_CR_PAT);
-	save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
-	ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
-		(1ULL << INTERCEPT_VMMCALL) |
-		(1ULL << INTERCEPT_SHUTDOWN);
-	ctrl->iopm_base_pa = virt_to_phys(svm_get_io_bitmap());
-	ctrl->msrpm_base_pa = virt_to_phys(svm_get_msr_bitmap());
-
-	if (npt_supported()) {
-		ctrl->nested_ctl = 1;
-		ctrl->nested_cr3 = (u64)npt_get_pml4e();
-		ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
-	}
-}
-
 struct regs regs;
 
 struct regs get_regs(void)
diff --git a/x86/svm.h b/x86/svm.h
index f4343883..623f2b36 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -55,7 +55,6 @@ bool default_finished(struct svm_test *test);
 int get_test_stage(struct svm_test *test);
 void set_test_stage(struct svm_test *test, int s);
 void inc_test_stage(struct svm_test *test);
-void vmcb_ident(struct vmcb *vmcb);
 struct regs get_regs(void);
 int __svm_vmrun(u64 rip);
 void __svm_bare_vmrun(void);
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (12 preceding siblings ...)
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 13/16] svm: move vmcb_ident " Maxim Levitsky
@ 2022-10-20 15:24 ` Maxim Levitsky
  2022-10-20 18:55   ` Sean Christopherson
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 15/16] svm: introduce svm_vcpu Maxim Levitsky
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 16/16] add IPI loss stress test Maxim Levitsky
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:24 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/svm_lib.h | 58 +++++++++++++++++++++++++++++++++++++++
 x86/svm.c         | 51 ++++++++++------------------------
 x86/svm.h         | 70 ++---------------------------------------------
 x86/svm_tests.c   | 24 ++++++++++------
 4 files changed, 91 insertions(+), 112 deletions(-)

diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index 27c3b137..59db26de 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -71,4 +71,62 @@ u8* svm_get_io_bitmap(void);
 #define MSR_BITMAP_SIZE 8192
 
 
+struct svm_extra_regs
+{
+    u64 rbx;
+    u64 rcx;
+    u64 rdx;
+    u64 rbp;
+    u64 rsi;
+    u64 rdi;
+    u64 r8;
+    u64 r9;
+    u64 r10;
+    u64 r11;
+    u64 r12;
+    u64 r13;
+    u64 r14;
+    u64 r15;
+};
+
+#define SWAP_GPRS(reg) \
+		"xchg %%rcx, 0x08(%%" reg ")\n\t"       \
+		"xchg %%rdx, 0x10(%%" reg ")\n\t"       \
+		"xchg %%rbp, 0x18(%%" reg ")\n\t"       \
+		"xchg %%rsi, 0x20(%%" reg ")\n\t"       \
+		"xchg %%rdi, 0x28(%%" reg ")\n\t"       \
+		"xchg %%r8,  0x30(%%" reg ")\n\t"       \
+		"xchg %%r9,  0x38(%%" reg ")\n\t"       \
+		"xchg %%r10, 0x40(%%" reg ")\n\t"       \
+		"xchg %%r11, 0x48(%%" reg ")\n\t"       \
+		"xchg %%r12, 0x50(%%" reg ")\n\t"       \
+		"xchg %%r13, 0x58(%%" reg ")\n\t"       \
+		"xchg %%r14, 0x60(%%" reg ")\n\t"       \
+		"xchg %%r15, 0x68(%%" reg ")\n\t"       \
+		\
+		"xchg %%rbx, 0x00(%%" reg ")\n\t"       \
+
+
+#define __SVM_VMRUN(vmcb, regs, label)          \
+		asm volatile (                          \
+			"vmload %%rax\n\t"                  \
+			"push %%rax\n\t"                    \
+			"push %%rbx\n\t"                    \
+			SWAP_GPRS("rbx")                    \
+			".global " label "\n\t"             \
+			label ": vmrun %%rax\n\t"           \
+			"vmsave %%rax\n\t"                  \
+			"pop %%rax\n\t"                     \
+			SWAP_GPRS("rax")                    \
+			"pop %%rax\n\t"                     \
+			:                                   \
+			: "a" (virt_to_phys(vmcb)),         \
+			  "b"(regs)                         \
+			/* clobbers*/                       \
+			: "memory"                          \
+		);
+
+#define SVM_VMRUN(vmcb, regs) \
+		__SVM_VMRUN(vmcb, regs, "vmrun_dummy_label_%=")
+
 #endif /* SRC_LIB_X86_SVM_LIB_H_ */
diff --git a/x86/svm.c b/x86/svm.c
index 37b4cd38..9484a6d1 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -76,11 +76,11 @@ static void test_thunk(struct svm_test *test)
 	vmmcall();
 }
 
-struct regs regs;
+struct svm_extra_regs regs;
 
-struct regs get_regs(void)
+struct svm_extra_regs* get_regs(void)
 {
-	return regs;
+	return &regs;
 }
 
 // rax handled specially below
@@ -97,13 +97,7 @@ int __svm_vmrun(u64 rip)
 	vmcb->save.rsp = (ulong)(guest_stack + ARRAY_SIZE(guest_stack));
 	regs.rdi = (ulong)v2_test;
 
-	asm volatile (
-		      ASM_PRE_VMRUN_CMD
-		      "vmrun %%rax\n\t"               \
-		      ASM_POST_VMRUN_CMD
-		      :
-		      : "a" (virt_to_phys(vmcb))
-		      : "memory", "r15");
+	SVM_VMRUN(vmcb, &regs);
 
 	return (vmcb->control.exit_code);
 }
@@ -113,12 +107,8 @@ int svm_vmrun(void)
 	return __svm_vmrun((u64)test_thunk);
 }
 
-extern u8 vmrun_rip;
-
 static noinline void test_run(struct svm_test *test)
 {
-	u64 vmcb_phys = virt_to_phys(vmcb);
-
 	irq_disable();
 	vmcb_ident(vmcb);
 
@@ -128,28 +118,17 @@ static noinline void test_run(struct svm_test *test)
 	vmcb->save.rsp = (ulong)(guest_stack + ARRAY_SIZE(guest_stack));
 	regs.rdi = (ulong)test;
 	do {
-		struct svm_test *the_test = test;
-		u64 the_vmcb = vmcb_phys;
-		asm volatile (
-			      "clgi;\n\t" // semi-colon needed for LLVM compatibility
-			      "sti \n\t"
-			      "call *%c[PREPARE_GIF_CLEAR](%[test]) \n \t"
-			      "mov %[vmcb_phys], %%rax \n\t"
-			      ASM_PRE_VMRUN_CMD
-			      ".global vmrun_rip\n\t"		\
-			      "vmrun_rip: vmrun %%rax\n\t"    \
-			      ASM_POST_VMRUN_CMD
-			      "cli \n\t"
-			      "stgi"
-			      : // inputs clobbered by the guest:
-				"=D" (the_test),            // first argument register
-				"=b" (the_vmcb)             // callee save register!
-			      : [test] "0" (the_test),
-				[vmcb_phys] "1"(the_vmcb),
-				[PREPARE_GIF_CLEAR] "i" (offsetof(struct svm_test, prepare_gif_clear))
-			      : "rax", "rcx", "rdx", "rsi",
-				"r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15",
-				"memory");
+
+		clgi();
+		sti();
+
+		test->prepare_gif_clear(test);
+
+		__SVM_VMRUN(vmcb, &regs, "vmrun_rip");
+
+		cli();
+		stgi();
+
 		++test->exits;
 	} while (!test->finished(test));
 	irq_enable();
diff --git a/x86/svm.h b/x86/svm.h
index 623f2b36..8d4515f0 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -23,26 +23,6 @@ struct svm_test {
 	bool on_vcpu_done;
 };
 
-struct regs {
-	u64 rax;
-	u64 rbx;
-	u64 rcx;
-	u64 rdx;
-	u64 cr2;
-	u64 rbp;
-	u64 rsi;
-	u64 rdi;
-	u64 r8;
-	u64 r9;
-	u64 r10;
-	u64 r11;
-	u64 r12;
-	u64 r13;
-	u64 r14;
-	u64 r15;
-	u64 rflags;
-};
-
 typedef void (*test_guest_func)(struct svm_test *);
 
 int run_svm_tests(int ac, char **av, struct svm_test *svm_tests);
@@ -55,7 +35,7 @@ bool default_finished(struct svm_test *test);
 int get_test_stage(struct svm_test *test);
 void set_test_stage(struct svm_test *test, int s);
 void inc_test_stage(struct svm_test *test);
-struct regs get_regs(void);
+struct svm_extra_regs * get_regs(void);
 int __svm_vmrun(u64 rip);
 void __svm_bare_vmrun(void);
 int svm_vmrun(void);
@@ -63,51 +43,5 @@ void test_set_guest(test_guest_func func);
 u64* get_npt_pte(u64 *pml4, u64 guest_addr, int level);
 
 extern struct vmcb *vmcb;
-
-
-#define SAVE_GPR_C                              \
-        "xchg %%rbx, regs+0x8\n\t"              \
-        "xchg %%rcx, regs+0x10\n\t"             \
-        "xchg %%rdx, regs+0x18\n\t"             \
-        "xchg %%rbp, regs+0x28\n\t"             \
-        "xchg %%rsi, regs+0x30\n\t"             \
-        "xchg %%rdi, regs+0x38\n\t"             \
-        "xchg %%r8, regs+0x40\n\t"              \
-        "xchg %%r9, regs+0x48\n\t"              \
-        "xchg %%r10, regs+0x50\n\t"             \
-        "xchg %%r11, regs+0x58\n\t"             \
-        "xchg %%r12, regs+0x60\n\t"             \
-        "xchg %%r13, regs+0x68\n\t"             \
-        "xchg %%r14, regs+0x70\n\t"             \
-        "xchg %%r15, regs+0x78\n\t"
-
-#define LOAD_GPR_C      SAVE_GPR_C
-
-#define ASM_PRE_VMRUN_CMD                       \
-                "vmload %%rax\n\t"              \
-                "mov regs+0x80, %%r15\n\t"      \
-                "mov %%r15, 0x170(%%rax)\n\t"   \
-                "mov regs, %%r15\n\t"           \
-                "mov %%r15, 0x1f8(%%rax)\n\t"   \
-                LOAD_GPR_C                      \
-
-#define ASM_POST_VMRUN_CMD                      \
-                SAVE_GPR_C                      \
-                "mov 0x170(%%rax), %%r15\n\t"   \
-                "mov %%r15, regs+0x80\n\t"      \
-                "mov 0x1f8(%%rax), %%r15\n\t"   \
-                "mov %%r15, regs\n\t"           \
-                "vmsave %%rax\n\t"              \
-
-
-
-#define SVM_BARE_VMRUN \
-	asm volatile ( \
-		ASM_PRE_VMRUN_CMD \
-                "vmrun %%rax\n\t"               \
-		ASM_POST_VMRUN_CMD \
-		: \
-		: "a" (virt_to_phys(vmcb)) \
-		: "memory", "r15") \
-
+extern struct svm_test svm_tests[];
 #endif
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index 57b5b572..475a40d0 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -398,7 +398,7 @@ static bool msr_intercept_finished(struct svm_test *test)
 		 * RCX holds the MSR index.
 		 */
 		printf("%s 0x%lx #GP exception\n",
-		       exit_info_1 ? "WRMSR" : "RDMSR", get_regs().rcx);
+		       exit_info_1 ? "WRMSR" : "RDMSR", get_regs()->rcx);
 	}
 
 	/* Jump over RDMSR/WRMSR instruction */
@@ -414,9 +414,9 @@ static bool msr_intercept_finished(struct svm_test *test)
 	 */
 	if (exit_info_1)
 		test->scratch =
-			((get_regs().rdx << 32) | (vmcb->save.rax & 0xffffffff));
+			((get_regs()->rdx << 32) | (vmcb->save.rax & 0xffffffff));
 	else
-		test->scratch = get_regs().rcx;
+		test->scratch = get_regs()->rcx;
 
 	return false;
 }
@@ -1851,7 +1851,7 @@ static volatile bool host_rflags_set_tf = false;
 static volatile bool host_rflags_set_rf = false;
 static u64 rip_detected;
 
-extern u64 *vmrun_rip;
+extern u64 vmrun_rip;
 
 static void host_rflags_db_handler(struct ex_regs *r)
 {
@@ -2989,6 +2989,8 @@ static void svm_lbrv_test0(void)
 
 static void svm_lbrv_test1(void)
 {
+	struct svm_extra_regs* regs = get_regs();
+
 	report(true, "Test that without LBRV enabled, guest LBR state does 'leak' to the host(1)");
 
 	vmcb->save.rip = (ulong)svm_lbrv_test_guest1;
@@ -2996,7 +2998,7 @@ static void svm_lbrv_test1(void)
 
 	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
 	DO_BRANCH(host_branch1);
-	SVM_BARE_VMRUN;
+	SVM_VMRUN(vmcb,regs);
 	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 
 	if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
@@ -3011,6 +3013,8 @@ static void svm_lbrv_test1(void)
 
 static void svm_lbrv_test2(void)
 {
+	struct svm_extra_regs* regs = get_regs();
+
 	report(true, "Test that without LBRV enabled, guest LBR state does 'leak' to the host(2)");
 
 	vmcb->save.rip = (ulong)svm_lbrv_test_guest2;
@@ -3019,7 +3023,7 @@ static void svm_lbrv_test2(void)
 	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
 	DO_BRANCH(host_branch2);
 	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
-	SVM_BARE_VMRUN;
+	SVM_VMRUN(vmcb,regs);
 	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
 
@@ -3035,6 +3039,8 @@ static void svm_lbrv_test2(void)
 
 static void svm_lbrv_nested_test1(void)
 {
+	struct svm_extra_regs* regs = get_regs();
+
 	if (!lbrv_supported()) {
 		report_skip("LBRV not supported in the guest");
 		return;
@@ -3047,7 +3053,7 @@ static void svm_lbrv_nested_test1(void)
 
 	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
 	DO_BRANCH(host_branch3);
-	SVM_BARE_VMRUN;
+	SVM_VMRUN(vmcb,regs);
 	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
 
@@ -3068,6 +3074,8 @@ static void svm_lbrv_nested_test1(void)
 
 static void svm_lbrv_nested_test2(void)
 {
+	struct svm_extra_regs* regs = get_regs();
+
 	if (!lbrv_supported()) {
 		report_skip("LBRV not supported in the guest");
 		return;
@@ -3083,7 +3091,7 @@ static void svm_lbrv_nested_test2(void)
 
 	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
 	DO_BRANCH(host_branch4);
-	SVM_BARE_VMRUN;
+	SVM_VMRUN(vmcb,regs);
 	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
 
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 15/16] svm: introduce svm_vcpu
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (13 preceding siblings ...)
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros Maxim Levitsky
@ 2022-10-20 15:24 ` Maxim Levitsky
  2022-10-20 19:02   ` Sean Christopherson
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 16/16] add IPI loss stress test Maxim Levitsky
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:24 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

This adds minimum amout of code to support tests that
run SVM on more that one vCPU.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 lib/x86/svm_lib.c |   9 +
 lib/x86/svm_lib.h |  10 +
 x86/svm.c         |  37 ++-
 x86/svm.h         |   5 +-
 x86/svm_npt.c     |  44 ++--
 x86/svm_tests.c   | 615 +++++++++++++++++++++++-----------------------
 6 files changed, 362 insertions(+), 358 deletions(-)

diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
index 2b067c65..1152c497 100644
--- a/lib/x86/svm_lib.c
+++ b/lib/x86/svm_lib.c
@@ -157,3 +157,12 @@ void vmcb_ident(struct vmcb *vmcb)
 		ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
 	}
 }
+
+void svm_vcpu_init(struct svm_vcpu *vcpu)
+{
+	vcpu->vmcb = alloc_page();
+	vmcb_ident(vcpu->vmcb);
+	memset(&vcpu->regs, 0, sizeof(vcpu->regs));
+	vcpu->stack = alloc_pages(4) + (PAGE_SIZE << 4);
+	vcpu->vmcb->save.rsp = (ulong)(vcpu->stack);
+}
diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
index 59db26de..c6957dba 100644
--- a/lib/x86/svm_lib.h
+++ b/lib/x86/svm_lib.h
@@ -89,6 +89,16 @@ struct svm_extra_regs
     u64 r15;
 };
 
+
+struct svm_vcpu
+{
+	struct vmcb *vmcb;
+	struct svm_extra_regs regs;
+	void *stack;
+};
+
+void svm_vcpu_init(struct svm_vcpu *vcpu);
+
 #define SWAP_GPRS(reg) \
 		"xchg %%rcx, 0x08(%%" reg ")\n\t"       \
 		"xchg %%rdx, 0x10(%%" reg ")\n\t"       \
diff --git a/x86/svm.c b/x86/svm.c
index 9484a6d1..7aa3ebd2 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -16,7 +16,7 @@
 #include "apic.h"
 #include "svm_lib.h"
 
-struct vmcb *vmcb;
+struct svm_vcpu vcpu0;
 
 bool smp_supported(void)
 {
@@ -30,7 +30,7 @@ bool default_supported(void)
 
 void default_prepare(struct svm_test *test)
 {
-	vmcb_ident(vmcb);
+	vmcb_ident(vcpu0.vmcb);
 }
 
 void default_prepare_gif_clear(struct svm_test *test)
@@ -76,30 +76,21 @@ static void test_thunk(struct svm_test *test)
 	vmmcall();
 }
 
-struct svm_extra_regs regs;
-
-struct svm_extra_regs* get_regs(void)
-{
-	return &regs;
-}
-
 // rax handled specially below
 
 
 struct svm_test *v2_test;
 
 
-u64 guest_stack[10000];
-
 int __svm_vmrun(u64 rip)
 {
-	vmcb->save.rip = (ulong)rip;
-	vmcb->save.rsp = (ulong)(guest_stack + ARRAY_SIZE(guest_stack));
-	regs.rdi = (ulong)v2_test;
+	vcpu0.vmcb->save.rip = (ulong)rip;
+	vcpu0.vmcb->save.rsp = (ulong)(vcpu0.stack);
+	vcpu0.regs.rdi = (ulong)v2_test;
 
-	SVM_VMRUN(vmcb, &regs);
+	SVM_VMRUN(vcpu0.vmcb, &vcpu0.regs);
 
-	return (vmcb->control.exit_code);
+	return (vcpu0.vmcb->control.exit_code);
 }
 
 int svm_vmrun(void)
@@ -110,13 +101,13 @@ int svm_vmrun(void)
 static noinline void test_run(struct svm_test *test)
 {
 	irq_disable();
-	vmcb_ident(vmcb);
+	vmcb_ident(vcpu0.vmcb);
 
 	test->prepare(test);
 	guest_main = test->guest_func;
-	vmcb->save.rip = (ulong)test_thunk;
-	vmcb->save.rsp = (ulong)(guest_stack + ARRAY_SIZE(guest_stack));
-	regs.rdi = (ulong)test;
+	vcpu0.vmcb->save.rip = (ulong)test_thunk;
+	vcpu0.vmcb->save.rsp = (ulong)(vcpu0.stack);
+	vcpu0.regs.rdi = (ulong)test;
 	do {
 
 		clgi();
@@ -124,7 +115,7 @@ static noinline void test_run(struct svm_test *test)
 
 		test->prepare_gif_clear(test);
 
-		__SVM_VMRUN(vmcb, &regs, "vmrun_rip");
+		__SVM_VMRUN(vcpu0.vmcb, &vcpu0.regs, "vmrun_rip");
 
 		cli();
 		stgi();
@@ -191,7 +182,7 @@ int run_svm_tests(int ac, char **av, struct svm_test *svm_tests)
 
 	setup_svm();
 
-	vmcb = alloc_page();
+	svm_vcpu_init(&vcpu0);
 
 	for (; svm_tests[i].name != NULL; i++) {
 		if (!test_wanted(svm_tests[i].name, av, ac))
@@ -209,7 +200,7 @@ int run_svm_tests(int ac, char **av, struct svm_test *svm_tests)
 			else
 				test_run(&svm_tests[i]);
 		} else {
-			vmcb_ident(vmcb);
+			vmcb_ident(vcpu0.vmcb);
 			v2_test = &(svm_tests[i]);
 			svm_tests[i].v2();
 		}
diff --git a/x86/svm.h b/x86/svm.h
index 8d4515f0..0c40a086 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -35,13 +35,14 @@ bool default_finished(struct svm_test *test);
 int get_test_stage(struct svm_test *test);
 void set_test_stage(struct svm_test *test, int s);
 void inc_test_stage(struct svm_test *test);
-struct svm_extra_regs * get_regs(void);
 int __svm_vmrun(u64 rip);
 void __svm_bare_vmrun(void);
 int svm_vmrun(void);
 void test_set_guest(test_guest_func func);
 u64* get_npt_pte(u64 *pml4, u64 guest_addr, int level);
 
-extern struct vmcb *vmcb;
+
 extern struct svm_test svm_tests[];
+extern struct svm_vcpu vcpu0;
+
 #endif
diff --git a/x86/svm_npt.c b/x86/svm_npt.c
index 8aac0bb6..53a82793 100644
--- a/x86/svm_npt.c
+++ b/x86/svm_npt.c
@@ -31,8 +31,8 @@ static bool npt_np_check(struct svm_test *test)
 
 	*pte |= 1ULL;
 
-	return (vmcb->control.exit_code == SVM_EXIT_NPF)
-	    && (vmcb->control.exit_info_1 == 0x100000004ULL);
+	return (vcpu0.vmcb->control.exit_code == SVM_EXIT_NPF)
+	    && (vcpu0.vmcb->control.exit_info_1 == 0x100000004ULL);
 }
 
 static void npt_nx_prepare(struct svm_test *test)
@@ -43,7 +43,7 @@ static void npt_nx_prepare(struct svm_test *test)
 	wrmsr(MSR_EFER, test->scratch | EFER_NX);
 
 	/* Clear the guest's EFER.NX, it should not affect NPT behavior. */
-	vmcb->save.efer &= ~EFER_NX;
+	vcpu0.vmcb->save.efer &= ~EFER_NX;
 
 	pte = npt_get_pte((u64) null_test);
 
@@ -58,8 +58,8 @@ static bool npt_nx_check(struct svm_test *test)
 
 	*pte &= ~PT64_NX_MASK;
 
-	return (vmcb->control.exit_code == SVM_EXIT_NPF)
-	    && (vmcb->control.exit_info_1 == 0x100000015ULL);
+	return (vcpu0.vmcb->control.exit_code == SVM_EXIT_NPF)
+	    && (vcpu0.vmcb->control.exit_info_1 == 0x100000015ULL);
 }
 
 static void npt_us_prepare(struct svm_test *test)
@@ -83,8 +83,8 @@ static bool npt_us_check(struct svm_test *test)
 
 	*pte |= (1ULL << 2);
 
-	return (vmcb->control.exit_code == SVM_EXIT_NPF)
-	    && (vmcb->control.exit_info_1 == 0x100000005ULL);
+	return (vcpu0.vmcb->control.exit_code == SVM_EXIT_NPF)
+	    && (vcpu0.vmcb->control.exit_info_1 == 0x100000005ULL);
 }
 
 static void npt_rw_prepare(struct svm_test *test)
@@ -110,8 +110,8 @@ static bool npt_rw_check(struct svm_test *test)
 
 	*pte |= (1ULL << 1);
 
-	return (vmcb->control.exit_code == SVM_EXIT_NPF)
-	    && (vmcb->control.exit_info_1 == 0x100000007ULL);
+	return (vcpu0.vmcb->control.exit_code == SVM_EXIT_NPF)
+	    && (vcpu0.vmcb->control.exit_info_1 == 0x100000007ULL);
 }
 
 static void npt_rw_pfwalk_prepare(struct svm_test *test)
@@ -130,9 +130,9 @@ static bool npt_rw_pfwalk_check(struct svm_test *test)
 
 	*pte |= (1ULL << 1);
 
-	return (vmcb->control.exit_code == SVM_EXIT_NPF)
-	    && (vmcb->control.exit_info_1 == 0x200000007ULL)
-	    && (vmcb->control.exit_info_2 == read_cr3());
+	return (vcpu0.vmcb->control.exit_code == SVM_EXIT_NPF)
+	    && (vcpu0.vmcb->control.exit_info_1 == 0x200000007ULL)
+	    && (vcpu0.vmcb->control.exit_info_2 == read_cr3());
 }
 
 static void npt_l1mmio_prepare(struct svm_test *test)
@@ -181,8 +181,8 @@ static bool npt_rw_l1mmio_check(struct svm_test *test)
 
 	*pte |= (1ULL << 1);
 
-	return (vmcb->control.exit_code == SVM_EXIT_NPF)
-	    && (vmcb->control.exit_info_1 == 0x100000007ULL);
+	return (vcpu0.vmcb->control.exit_code == SVM_EXIT_NPF)
+	    && (vcpu0.vmcb->control.exit_info_1 == 0x100000007ULL);
 }
 
 static void basic_guest_main(struct svm_test *test)
@@ -199,8 +199,8 @@ static void __svm_npt_rsvd_bits_test(u64 * pxe, u64 rsvd_bits, u64 efer,
 	wrmsr(MSR_EFER, efer);
 	write_cr4(cr4);
 
-	vmcb->save.efer = guest_efer;
-	vmcb->save.cr4 = guest_cr4;
+	vcpu0.vmcb->save.efer = guest_efer;
+	vcpu0.vmcb->save.cr4 = guest_cr4;
 
 	*pxe |= rsvd_bits;
 
@@ -226,10 +226,10 @@ static void __svm_npt_rsvd_bits_test(u64 * pxe, u64 rsvd_bits, u64 efer,
 
 	}
 
-	report(vmcb->control.exit_info_1 == pfec,
+	report(vcpu0.vmcb->control.exit_info_1 == pfec,
 	       "Wanted PFEC = 0x%lx, got PFEC = %lx, PxE = 0x%lx.  "
 	       "host.NX = %u, host.SMEP = %u, guest.NX = %u, guest.SMEP = %u",
-	       pfec, vmcb->control.exit_info_1, *pxe,
+	       pfec, vcpu0.vmcb->control.exit_info_1, *pxe,
 	       !!(efer & EFER_NX), !!(cr4 & X86_CR4_SMEP),
 	       !!(guest_efer & EFER_NX), !!(guest_cr4 & X86_CR4_SMEP));
 
@@ -317,8 +317,8 @@ static void svm_npt_rsvd_bits_test(void)
 
 	saved_efer = host_efer = rdmsr(MSR_EFER);
 	saved_cr4 = host_cr4 = read_cr4();
-	sg_efer = guest_efer = vmcb->save.efer;
-	sg_cr4 = guest_cr4 = vmcb->save.cr4;
+	sg_efer = guest_efer = vcpu0.vmcb->save.efer;
+	sg_cr4 = guest_cr4 = vcpu0.vmcb->save.cr4;
 
 	test_set_guest(basic_guest_main);
 
@@ -350,8 +350,8 @@ skip_pte_test:
 
 	wrmsr(MSR_EFER, saved_efer);
 	write_cr4(saved_cr4);
-	vmcb->save.efer = sg_efer;
-	vmcb->save.cr4 = sg_cr4;
+	vcpu0.vmcb->save.efer = sg_efer;
+	vcpu0.vmcb->save.cr4 = sg_cr4;
 }
 
 #define NPT_V1_TEST(name, prepare, guest_code, check)				\
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index 475a40d0..8c49718d 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -43,34 +43,34 @@ static void null_test(struct svm_test *test)
 
 static bool null_check(struct svm_test *test)
 {
-	return vmcb->control.exit_code == SVM_EXIT_VMMCALL;
+	return vcpu0.vmcb->control.exit_code == SVM_EXIT_VMMCALL;
 }
 
 static void prepare_no_vmrun_int(struct svm_test *test)
 {
-	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMRUN);
+	vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMRUN);
 }
 
 static bool check_no_vmrun_int(struct svm_test *test)
 {
-	return vmcb->control.exit_code == SVM_EXIT_ERR;
+	return vcpu0.vmcb->control.exit_code == SVM_EXIT_ERR;
 }
 
 static void test_vmrun(struct svm_test *test)
 {
-	asm volatile ("vmrun %0" : : "a"(virt_to_phys(vmcb)));
+	asm volatile ("vmrun %0" : : "a"(virt_to_phys(vcpu0.vmcb)));
 }
 
 static bool check_vmrun(struct svm_test *test)
 {
-	return vmcb->control.exit_code == SVM_EXIT_VMRUN;
+	return vcpu0.vmcb->control.exit_code == SVM_EXIT_VMRUN;
 }
 
 static void prepare_rsm_intercept(struct svm_test *test)
 {
 	default_prepare(test);
-	vmcb->control.intercept |= 1 << INTERCEPT_RSM;
-	vmcb->control.intercept_exceptions |= (1ULL << UD_VECTOR);
+	vcpu0.vmcb->control.intercept |= 1 << INTERCEPT_RSM;
+	vcpu0.vmcb->control.intercept_exceptions |= (1ULL << UD_VECTOR);
 }
 
 static void test_rsm_intercept(struct svm_test *test)
@@ -87,22 +87,22 @@ static bool finished_rsm_intercept(struct svm_test *test)
 {
 	switch (get_test_stage(test)) {
 	case 0:
-		if (vmcb->control.exit_code != SVM_EXIT_RSM) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_RSM) {
 			report_fail("VMEXIT not due to rsm. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->control.intercept &= ~(1 << INTERCEPT_RSM);
+		vcpu0.vmcb->control.intercept &= ~(1 << INTERCEPT_RSM);
 		inc_test_stage(test);
 		break;
 
 	case 1:
-		if (vmcb->control.exit_code != SVM_EXIT_EXCP_BASE + UD_VECTOR) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_EXCP_BASE + UD_VECTOR) {
 			report_fail("VMEXIT not due to #UD. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->save.rip += 2;
+		vcpu0.vmcb->save.rip += 2;
 		inc_test_stage(test);
 		break;
 
@@ -115,7 +115,7 @@ static bool finished_rsm_intercept(struct svm_test *test)
 static void prepare_cr3_intercept(struct svm_test *test)
 {
 	default_prepare(test);
-	vmcb->control.intercept_cr_read |= 1 << 3;
+	vcpu0.vmcb->control.intercept_cr_read |= 1 << 3;
 }
 
 static void test_cr3_intercept(struct svm_test *test)
@@ -125,7 +125,7 @@ static void test_cr3_intercept(struct svm_test *test)
 
 static bool check_cr3_intercept(struct svm_test *test)
 {
-	return vmcb->control.exit_code == SVM_EXIT_READ_CR3;
+	return vcpu0.vmcb->control.exit_code == SVM_EXIT_READ_CR3;
 }
 
 static bool check_cr3_nointercept(struct svm_test *test)
@@ -149,7 +149,7 @@ static void corrupt_cr3_intercept_bypass(void *_test)
 static void prepare_cr3_intercept_bypass(struct svm_test *test)
 {
 	default_prepare(test);
-	vmcb->control.intercept_cr_read |= 1 << 3;
+	vcpu0.vmcb->control.intercept_cr_read |= 1 << 3;
 	on_cpu_async(1, corrupt_cr3_intercept_bypass, test);
 }
 
@@ -169,8 +169,8 @@ static void test_cr3_intercept_bypass(struct svm_test *test)
 static void prepare_dr_intercept(struct svm_test *test)
 {
 	default_prepare(test);
-	vmcb->control.intercept_dr_read = 0xff;
-	vmcb->control.intercept_dr_write = 0xff;
+	vcpu0.vmcb->control.intercept_dr_read = 0xff;
+	vcpu0.vmcb->control.intercept_dr_write = 0xff;
 }
 
 static void test_dr_intercept(struct svm_test *test)
@@ -254,7 +254,7 @@ static void test_dr_intercept(struct svm_test *test)
 
 static bool dr_intercept_finished(struct svm_test *test)
 {
-	ulong n = (vmcb->control.exit_code - SVM_EXIT_READ_DR0);
+	ulong n = (vcpu0.vmcb->control.exit_code - SVM_EXIT_READ_DR0);
 
 	/* Only expect DR intercepts */
 	if (n > (SVM_EXIT_MAX_DR_INTERCEPT - SVM_EXIT_READ_DR0))
@@ -270,7 +270,7 @@ static bool dr_intercept_finished(struct svm_test *test)
 	test->scratch = (n % 16);
 
 	/* Jump over MOV instruction */
-	vmcb->save.rip += 3;
+	vcpu0.vmcb->save.rip += 3;
 
 	return false;
 }
@@ -287,7 +287,7 @@ static bool next_rip_supported(void)
 
 static void prepare_next_rip(struct svm_test *test)
 {
-	vmcb->control.intercept |= (1ULL << INTERCEPT_RDTSC);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_RDTSC);
 }
 
 
@@ -303,15 +303,15 @@ static bool check_next_rip(struct svm_test *test)
 	extern char exp_next_rip;
 	unsigned long address = (unsigned long)&exp_next_rip;
 
-	return address == vmcb->control.next_rip;
+	return address == vcpu0.vmcb->control.next_rip;
 }
 
 
 static void prepare_msr_intercept(struct svm_test *test)
 {
 	default_prepare(test);
-	vmcb->control.intercept |= (1ULL << INTERCEPT_MSR_PROT);
-	vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_MSR_PROT);
+	vcpu0.vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR);
 	memset(svm_get_msr_bitmap(), 0xff, MSR_BITMAP_SIZE);
 }
 
@@ -363,12 +363,12 @@ static void test_msr_intercept(struct svm_test *test)
 
 static bool msr_intercept_finished(struct svm_test *test)
 {
-	u32 exit_code = vmcb->control.exit_code;
+	u32 exit_code = vcpu0.vmcb->control.exit_code;
 	u64 exit_info_1;
 	u8 *opcode;
 
 	if (exit_code == SVM_EXIT_MSR) {
-		exit_info_1 = vmcb->control.exit_info_1;
+		exit_info_1 = vcpu0.vmcb->control.exit_info_1;
 	} else {
 		/*
 		 * If #GP exception occurs instead, check that it was
@@ -378,7 +378,7 @@ static bool msr_intercept_finished(struct svm_test *test)
 		if (exit_code != (SVM_EXIT_EXCP_BASE + GP_VECTOR))
 			return true;
 
-		opcode = (u8 *)vmcb->save.rip;
+		opcode = (u8 *)vcpu0.vmcb->save.rip;
 		if (opcode[0] != 0x0f)
 			return true;
 
@@ -398,11 +398,11 @@ static bool msr_intercept_finished(struct svm_test *test)
 		 * RCX holds the MSR index.
 		 */
 		printf("%s 0x%lx #GP exception\n",
-		       exit_info_1 ? "WRMSR" : "RDMSR", get_regs()->rcx);
+		       exit_info_1 ? "WRMSR" : "RDMSR", vcpu0.regs.rcx);
 	}
 
 	/* Jump over RDMSR/WRMSR instruction */
-	vmcb->save.rip += 2;
+	vcpu0.vmcb->save.rip += 2;
 
 	/*
 	 * Test whether the intercept was for RDMSR/WRMSR.
@@ -414,9 +414,9 @@ static bool msr_intercept_finished(struct svm_test *test)
 	 */
 	if (exit_info_1)
 		test->scratch =
-			((get_regs()->rdx << 32) | (vmcb->save.rax & 0xffffffff));
+			((vcpu0.regs.rdx << 32) | (vcpu0.vmcb->save.rax & 0xffffffff));
 	else
-		test->scratch = get_regs()->rcx;
+		test->scratch = vcpu0.regs.rcx;
 
 	return false;
 }
@@ -429,7 +429,7 @@ static bool check_msr_intercept(struct svm_test *test)
 
 static void prepare_mode_switch(struct svm_test *test)
 {
-	vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR)
+	vcpu0.vmcb->control.intercept_exceptions |= (1ULL << GP_VECTOR)
 		|  (1ULL << UD_VECTOR)
 		|  (1ULL << DF_VECTOR)
 		|  (1ULL << PF_VECTOR);
@@ -495,16 +495,16 @@ static bool mode_switch_finished(struct svm_test *test)
 {
 	u64 cr0, cr4, efer;
 
-	cr0  = vmcb->save.cr0;
-	cr4  = vmcb->save.cr4;
-	efer = vmcb->save.efer;
+	cr0  = vcpu0.vmcb->save.cr0;
+	cr4  = vcpu0.vmcb->save.cr4;
+	efer = vcpu0.vmcb->save.efer;
 
 	/* Only expect VMMCALL intercepts */
-	if (vmcb->control.exit_code != SVM_EXIT_VMMCALL)
+	if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL)
 		return true;
 
 	/* Jump over VMMCALL instruction */
-	vmcb->save.rip += 3;
+	vcpu0.vmcb->save.rip += 3;
 
 	/* Do sanity checks */
 	switch (test->scratch) {
@@ -539,7 +539,7 @@ static void prepare_ioio(struct svm_test *test)
 {
 	u8 *io_bitmap = svm_get_io_bitmap();
 
-	vmcb->control.intercept |= (1ULL << INTERCEPT_IOIO_PROT);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_IOIO_PROT);
 	test->scratch = 0;
 	memset(io_bitmap, 0, 8192);
 	io_bitmap[8192] = 0xFF;
@@ -623,17 +623,17 @@ static bool ioio_finished(struct svm_test *test)
 	u8 *io_bitmap = svm_get_io_bitmap();
 
 	/* Only expect IOIO intercepts */
-	if (vmcb->control.exit_code == SVM_EXIT_VMMCALL)
+	if (vcpu0.vmcb->control.exit_code == SVM_EXIT_VMMCALL)
 		return true;
 
-	if (vmcb->control.exit_code != SVM_EXIT_IOIO)
+	if (vcpu0.vmcb->control.exit_code != SVM_EXIT_IOIO)
 		return true;
 
 	/* one step forward */
 	test->scratch += 1;
 
-	port = vmcb->control.exit_info_1 >> 16;
-	size = (vmcb->control.exit_info_1 >> SVM_IOIO_SIZE_SHIFT) & 7;
+	port = vcpu0.vmcb->control.exit_info_1 >> 16;
+	size = (vcpu0.vmcb->control.exit_info_1 >> SVM_IOIO_SIZE_SHIFT) & 7;
 
 	while (size--) {
 		io_bitmap[port / 8] &= ~(1 << (port & 7));
@@ -653,7 +653,7 @@ static bool check_ioio(struct svm_test *test)
 
 static void prepare_asid_zero(struct svm_test *test)
 {
-	vmcb->control.asid = 0;
+	vcpu0.vmcb->control.asid = 0;
 }
 
 static void test_asid_zero(struct svm_test *test)
@@ -663,12 +663,12 @@ static void test_asid_zero(struct svm_test *test)
 
 static bool check_asid_zero(struct svm_test *test)
 {
-	return vmcb->control.exit_code == SVM_EXIT_ERR;
+	return vcpu0.vmcb->control.exit_code == SVM_EXIT_ERR;
 }
 
 static void sel_cr0_bug_prepare(struct svm_test *test)
 {
-	vmcb->control.intercept |= (1ULL << INTERCEPT_SELECTIVE_CR0);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_SELECTIVE_CR0);
 }
 
 static bool sel_cr0_bug_finished(struct svm_test *test)
@@ -696,7 +696,7 @@ static void sel_cr0_bug_test(struct svm_test *test)
 
 static bool sel_cr0_bug_check(struct svm_test *test)
 {
-	return vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE;
+	return vcpu0.vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE;
 }
 
 #define TSC_ADJUST_VALUE    (1ll << 32)
@@ -711,7 +711,7 @@ static bool tsc_adjust_supported(void)
 static void tsc_adjust_prepare(struct svm_test *test)
 {
 	default_prepare(test);
-	vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
+	vcpu0.vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
 
 	wrmsr(MSR_IA32_TSC_ADJUST, -TSC_ADJUST_VALUE);
 	int64_t adjust = rdmsr(MSR_IA32_TSC_ADJUST);
@@ -766,13 +766,13 @@ static void svm_tsc_scale_run_testcase(u64 duration,
 	guest_tsc_delay_value = (duration << TSC_SHIFT) * tsc_scale;
 
 	test_set_guest(svm_tsc_scale_guest);
-	vmcb->control.tsc_offset = tsc_offset;
+	vcpu0.vmcb->control.tsc_offset = tsc_offset;
 	wrmsr(MSR_AMD64_TSC_RATIO, (u64)(tsc_scale * (1ULL << 32)));
 
 	start_tsc = rdtsc();
 
 	if (svm_vmrun() != SVM_EXIT_VMMCALL)
-		report_fail("unexpected vm exit code 0x%x", vmcb->control.exit_code);
+		report_fail("unexpected vm exit code 0x%x", vcpu0.vmcb->control.exit_code);
 
 	actual_duration = (rdtsc() - start_tsc) >> TSC_SHIFT;
 
@@ -857,7 +857,7 @@ static bool latency_finished(struct svm_test *test)
 
 	vmexit_sum += cycles;
 
-	vmcb->save.rip += 3;
+	vcpu0.vmcb->save.rip += 3;
 
 	runs -= 1;
 
@@ -868,7 +868,7 @@ static bool latency_finished(struct svm_test *test)
 
 static bool latency_finished_clean(struct svm_test *test)
 {
-	vmcb->control.clean = VMCB_CLEAN_ALL;
+	vcpu0.vmcb->control.clean = VMCB_CLEAN_ALL;
 	return latency_finished(test);
 }
 
@@ -892,7 +892,7 @@ static void lat_svm_insn_prepare(struct svm_test *test)
 
 static bool lat_svm_insn_finished(struct svm_test *test)
 {
-	u64 vmcb_phys = virt_to_phys(vmcb);
+	u64 vmcb_phys = virt_to_phys(vcpu0.vmcb);
 	u64 cycles;
 
 	for ( ; runs != 0; runs--) {
@@ -972,8 +972,8 @@ static void pending_event_prepare(struct svm_test *test)
 
 	pending_event_guest_run = false;
 
-	vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
-	vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
+	vcpu0.vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
 
 	apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL |
 		       APIC_DM_FIXED | ipi_vector, 0);
@@ -990,14 +990,14 @@ static bool pending_event_finished(struct svm_test *test)
 {
 	switch (get_test_stage(test)) {
 	case 0:
-		if (vmcb->control.exit_code != SVM_EXIT_INTR) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_INTR) {
 			report_fail("VMEXIT not due to pending interrupt. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
 
-		vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR);
-		vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+		vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR);
+		vcpu0.vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
 
 		if (pending_event_guest_run) {
 			report_fail("Guest ran before host received IPI\n");
@@ -1080,19 +1080,19 @@ static void pending_event_cli_test(struct svm_test *test)
 
 static bool pending_event_cli_finished(struct svm_test *test)
 {
-	if ( vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+	if ( vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 		report_fail("VM_EXIT return to host is not EXIT_VMMCALL exit reason 0x%x",
-			    vmcb->control.exit_code);
+			    vcpu0.vmcb->control.exit_code);
 		return true;
 	}
 
 	switch (get_test_stage(test)) {
 	case 0:
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 
 		pending_event_ipi_fired = false;
 
-		vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
+		vcpu0.vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
 
 		/* Now entering again with VINTR_MASKING=1.  */
 		apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL |
@@ -1225,30 +1225,30 @@ static bool interrupt_finished(struct svm_test *test)
 	switch (get_test_stage(test)) {
 	case 0:
 	case 2:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 
-		vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
-		vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
+		vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
+		vcpu0.vmcb->control.int_ctl |= V_INTR_MASKING_MASK;
 		break;
 
 	case 1:
 	case 3:
-		if (vmcb->control.exit_code != SVM_EXIT_INTR) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_INTR) {
 			report_fail("VMEXIT not due to intr intercept. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
 
 		irq_enable();
 		irq_disable();
 
-		vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR);
-		vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+		vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR);
+		vcpu0.vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
 		break;
 
 	case 4:
@@ -1309,20 +1309,20 @@ static bool nmi_finished(struct svm_test *test)
 {
 	switch (get_test_stage(test)) {
 	case 0:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 
-		vmcb->control.intercept |= (1ULL << INTERCEPT_NMI);
+		vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_NMI);
 		break;
 
 	case 1:
-		if (vmcb->control.exit_code != SVM_EXIT_NMI) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_NMI) {
 			report_fail("VMEXIT not due to NMI intercept. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
 
@@ -1411,20 +1411,20 @@ static bool nmi_hlt_finished(struct svm_test *test)
 {
 	switch (get_test_stage(test)) {
 	case 1:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 
-		vmcb->control.intercept |= (1ULL << INTERCEPT_NMI);
+		vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_NMI);
 		break;
 
 	case 2:
-		if (vmcb->control.exit_code != SVM_EXIT_NMI) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_NMI) {
 			report_fail("VMEXIT not due to NMI intercept. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
 
@@ -1470,34 +1470,34 @@ static bool exc_inject_finished(struct svm_test *test)
 {
 	switch (get_test_stage(test)) {
 	case 0:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->save.rip += 3;
-		vmcb->control.event_inj = NMI_VECTOR | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID;
+		vcpu0.vmcb->save.rip += 3;
+		vcpu0.vmcb->control.event_inj = NMI_VECTOR | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID;
 		break;
 
 	case 1:
-		if (vmcb->control.exit_code != SVM_EXIT_ERR) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_ERR) {
 			report_fail("VMEXIT not due to error. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
 		report(count_exc == 0, "exception with vector 2 not injected");
-		vmcb->control.event_inj = DE_VECTOR | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID;
+		vcpu0.vmcb->control.event_inj = DE_VECTOR | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID;
 		break;
 
 	case 2:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 		report(count_exc == 1, "divide overflow exception injected");
-		report(!(vmcb->control.event_inj & SVM_EVTINJ_VALID), "eventinj.VALID cleared");
+		report(!(vcpu0.vmcb->control.event_inj & SVM_EVTINJ_VALID), "eventinj.VALID cleared");
 		break;
 
 	default:
@@ -1525,9 +1525,9 @@ static void virq_inject_prepare(struct svm_test *test)
 {
 	handle_irq(0xf1, virq_isr);
 	default_prepare(test);
-	vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
+	vcpu0.vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
 		(0x0f << V_INTR_PRIO_SHIFT); // Set to the highest priority
-	vmcb->control.int_vector = 0xf1;
+	vcpu0.vmcb->control.int_vector = 0xf1;
 	virq_fired = false;
 	set_test_stage(test, 0);
 }
@@ -1580,66 +1580,66 @@ static void virq_inject_test(struct svm_test *test)
 
 static bool virq_inject_finished(struct svm_test *test)
 {
-	vmcb->save.rip += 3;
+	vcpu0.vmcb->save.rip += 3;
 
 	switch (get_test_stage(test)) {
 	case 0:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		if (vmcb->control.int_ctl & V_IRQ_MASK) {
+		if (vcpu0.vmcb->control.int_ctl & V_IRQ_MASK) {
 			report_fail("V_IRQ not cleared on VMEXIT after firing");
 			return true;
 		}
 		virq_fired = false;
-		vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
-		vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
+		vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+		vcpu0.vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
 			(0x0f << V_INTR_PRIO_SHIFT);
 		break;
 
 	case 1:
-		if (vmcb->control.exit_code != SVM_EXIT_VINTR) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VINTR) {
 			report_fail("VMEXIT not due to vintr. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
 		if (virq_fired) {
 			report_fail("V_IRQ fired before SVM_EXIT_VINTR");
 			return true;
 		}
-		vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+		vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
 		break;
 
 	case 2:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
 		virq_fired = false;
 		// Set irq to lower priority
-		vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
+		vcpu0.vmcb->control.int_ctl = V_INTR_MASKING_MASK | V_IRQ_MASK |
 			(0x08 << V_INTR_PRIO_SHIFT);
 		// Raise guest TPR
-		vmcb->control.int_ctl |= 0x0a & V_TPR_MASK;
+		vcpu0.vmcb->control.int_ctl |= 0x0a & V_TPR_MASK;
 		break;
 
 	case 3:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+		vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
 		break;
 
 	case 4:
 		// INTERCEPT_VINTR should be ignored because V_INTR_PRIO < V_TPR
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
 		break;
@@ -1693,8 +1693,8 @@ static void reg_corruption_prepare(struct svm_test *test)
 	default_prepare(test);
 	set_test_stage(test, 0);
 
-	vmcb->control.int_ctl = V_INTR_MASKING_MASK;
-	vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
+	vcpu0.vmcb->control.int_ctl = V_INTR_MASKING_MASK;
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
 
 	handle_irq(TIMER_VECTOR, reg_corruption_isr);
 
@@ -1730,9 +1730,9 @@ static bool reg_corruption_finished(struct svm_test *test)
 		goto cleanup;
 	}
 
-	if (vmcb->control.exit_code == SVM_EXIT_INTR) {
+	if (vcpu0.vmcb->control.exit_code == SVM_EXIT_INTR) {
 
-		void* guest_rip = (void*)vmcb->save.rip;
+		void* guest_rip = (void*)vcpu0.vmcb->save.rip;
 
 		irq_enable();
 		irq_disable();
@@ -1802,7 +1802,7 @@ static volatile bool init_intercept;
 static void init_intercept_prepare(struct svm_test *test)
 {
 	init_intercept = false;
-	vmcb->control.intercept |= (1ULL << INTERCEPT_INIT);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_INIT);
 }
 
 static void init_intercept_test(struct svm_test *test)
@@ -1812,11 +1812,11 @@ static void init_intercept_test(struct svm_test *test)
 
 static bool init_intercept_finished(struct svm_test *test)
 {
-	vmcb->save.rip += 3;
+	vcpu0.vmcb->save.rip += 3;
 
-	if (vmcb->control.exit_code != SVM_EXIT_INIT) {
+	if (vcpu0.vmcb->control.exit_code != SVM_EXIT_INIT) {
 		report_fail("VMEXIT not due to init intercept. Exit reason 0x%x",
-			    vmcb->control.exit_code);
+			    vcpu0.vmcb->control.exit_code);
 
 		return true;
 	}
@@ -1916,12 +1916,12 @@ static bool host_rflags_finished(struct svm_test *test)
 {
 	switch (get_test_stage(test)) {
 	case 0:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 			report_fail("Unexpected VMEXIT. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 		/*
 		 * Setting host EFLAGS.TF not immediately before VMRUN, causes
 		 * #DB trap before first guest instruction is executed
@@ -1929,14 +1929,14 @@ static bool host_rflags_finished(struct svm_test *test)
 		host_rflags_set_tf = true;
 		break;
 	case 1:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
 		    host_rflags_guest_main_flag != 1) {
 			report_fail("Unexpected VMEXIT or #DB handler"
 				    " invoked before guest main. Exit reason 0x%x",
-				    vmcb->control.exit_code);
+				    vcpu0.vmcb->control.exit_code);
 			return true;
 		}
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 		/*
 		 * Setting host EFLAGS.TF immediately before VMRUN, causes #DB
 		 * trap after VMRUN completes on the host side (i.e., after
@@ -1945,21 +1945,21 @@ static bool host_rflags_finished(struct svm_test *test)
 		host_rflags_ss_on_vmrun = true;
 		break;
 	case 2:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
 		    rip_detected != (u64)&vmrun_rip + 3) {
 			report_fail("Unexpected VMEXIT or RIP mismatch."
 				    " Exit reason 0x%x, RIP actual: %lx, RIP expected: "
-				    "%lx", vmcb->control.exit_code,
+				    "%lx", vcpu0.vmcb->control.exit_code,
 				    (u64)&vmrun_rip + 3, rip_detected);
 			return true;
 		}
 		host_rflags_set_rf = true;
 		host_rflags_guest_main_flag = 0;
 		host_rflags_vmrun_reached = false;
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 		break;
 	case 3:
-		if (vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
+		if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL ||
 		    rip_detected != (u64)&vmrun_rip ||
 		    host_rflags_guest_main_flag != 1 ||
 		    host_rflags_db_handler_flag > 1 ||
@@ -1967,13 +1967,13 @@ static bool host_rflags_finished(struct svm_test *test)
 			report_fail("Unexpected VMEXIT or RIP mismatch or "
 				    "EFLAGS.RF not cleared."
 				    " Exit reason 0x%x, RIP actual: %lx, RIP expected: "
-				    "%lx", vmcb->control.exit_code,
+				    "%lx", vcpu0.vmcb->control.exit_code,
 				    (u64)&vmrun_rip, rip_detected);
 			return true;
 		}
 		host_rflags_set_tf = false;
 		host_rflags_set_rf = false;
-		vmcb->save.rip += 3;
+		vcpu0.vmcb->save.rip += 3;
 		break;
 	default:
 		return true;
@@ -2015,7 +2015,7 @@ static void svm_cr4_osxsave_test(void)
 		unsigned long cr4 = read_cr4() | X86_CR4_OSXSAVE;
 
 		write_cr4(cr4);
-		vmcb->save.cr4 = cr4;
+		vcpu0.vmcb->save.cr4 = cr4;
 	}
 
 	report(this_cpu_has(X86_FEATURE_OSXSAVE), "CPUID.01H:ECX.XSAVE set before VMRUN");
@@ -2063,13 +2063,13 @@ static void basic_guest_main(struct svm_test *test)
 		tmp = val | mask;					\
 		switch (cr) {						\
 		case 0:							\
-			vmcb->save.cr0 = tmp;				\
+			vcpu0.vmcb->save.cr0 = tmp;				\
 			break;						\
 		case 3:							\
-			vmcb->save.cr3 = tmp;				\
+			vcpu0.vmcb->save.cr3 = tmp;				\
 			break;						\
 		case 4:							\
-			vmcb->save.cr4 = tmp;				\
+			vcpu0.vmcb->save.cr4 = tmp;				\
 		}							\
 		r = svm_vmrun();					\
 		report(r == exit_code, "Test CR%d %s%d:%d: %lx, wanted exit 0x%x, got 0x%x", \
@@ -2082,39 +2082,39 @@ static void test_efer(void)
 	/*
 	 * Un-setting EFER.SVME is illegal
 	 */
-	u64 efer_saved = vmcb->save.efer;
+	u64 efer_saved = vcpu0.vmcb->save.efer;
 	u64 efer = efer_saved;
 
 	report (svm_vmrun() == SVM_EXIT_VMMCALL, "EFER.SVME: %lx", efer);
 	efer &= ~EFER_SVME;
-	vmcb->save.efer = efer;
+	vcpu0.vmcb->save.efer = efer;
 	report (svm_vmrun() == SVM_EXIT_ERR, "EFER.SVME: %lx", efer);
-	vmcb->save.efer = efer_saved;
+	vcpu0.vmcb->save.efer = efer_saved;
 
 	/*
 	 * EFER MBZ bits: 63:16, 9
 	 */
-	efer_saved = vmcb->save.efer;
+	efer_saved = vcpu0.vmcb->save.efer;
 
-	SVM_TEST_REG_RESERVED_BITS(8, 9, 1, "EFER", vmcb->save.efer,
+	SVM_TEST_REG_RESERVED_BITS(8, 9, 1, "EFER", vcpu0.vmcb->save.efer,
 				   efer_saved, SVM_EFER_RESERVED_MASK);
-	SVM_TEST_REG_RESERVED_BITS(16, 63, 4, "EFER", vmcb->save.efer,
+	SVM_TEST_REG_RESERVED_BITS(16, 63, 4, "EFER", vcpu0.vmcb->save.efer,
 				   efer_saved, SVM_EFER_RESERVED_MASK);
 
 	/*
 	 * EFER.LME and CR0.PG are both set and CR4.PAE is zero.
 	 */
-	u64 cr0_saved = vmcb->save.cr0;
+	u64 cr0_saved = vcpu0.vmcb->save.cr0;
 	u64 cr0;
-	u64 cr4_saved = vmcb->save.cr4;
+	u64 cr4_saved = vcpu0.vmcb->save.cr4;
 	u64 cr4;
 
 	efer = efer_saved | EFER_LME;
-	vmcb->save.efer = efer;
+	vcpu0.vmcb->save.efer = efer;
 	cr0 = cr0_saved | X86_CR0_PG | X86_CR0_PE;
-	vmcb->save.cr0 = cr0;
+	vcpu0.vmcb->save.cr0 = cr0;
 	cr4 = cr4_saved & ~X86_CR4_PAE;
-	vmcb->save.cr4 = cr4;
+	vcpu0.vmcb->save.cr4 = cr4;
 	report(svm_vmrun() == SVM_EXIT_ERR, "EFER.LME=1 (%lx), "
 	       "CR0.PG=1 (%lx) and CR4.PAE=0 (%lx)", efer, cr0, cr4);
 
@@ -2125,31 +2125,31 @@ static void test_efer(void)
 	 * SVM_EXIT_ERR.
 	 */
 	cr4 = cr4_saved | X86_CR4_PAE;
-	vmcb->save.cr4 = cr4;
+	vcpu0.vmcb->save.cr4 = cr4;
 	cr0 &= ~X86_CR0_PE;
-	vmcb->save.cr0 = cr0;
+	vcpu0.vmcb->save.cr0 = cr0;
 	report(svm_vmrun() == SVM_EXIT_ERR, "EFER.LME=1 (%lx), "
 	       "CR0.PG=1 and CR0.PE=0 (%lx)", efer, cr0);
 
 	/*
 	 * EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
 	 */
-	u32 cs_attrib_saved = vmcb->save.cs.attrib;
+	u32 cs_attrib_saved = vcpu0.vmcb->save.cs.attrib;
 	u32 cs_attrib;
 
 	cr0 |= X86_CR0_PE;
-	vmcb->save.cr0 = cr0;
+	vcpu0.vmcb->save.cr0 = cr0;
 	cs_attrib = cs_attrib_saved | SVM_SELECTOR_L_MASK |
 		SVM_SELECTOR_DB_MASK;
-	vmcb->save.cs.attrib = cs_attrib;
+	vcpu0.vmcb->save.cs.attrib = cs_attrib;
 	report(svm_vmrun() == SVM_EXIT_ERR, "EFER.LME=1 (%lx), "
 	       "CR0.PG=1 (%lx), CR4.PAE=1 (%lx), CS.L=1 and CS.D=1 (%x)",
 	       efer, cr0, cr4, cs_attrib);
 
-	vmcb->save.cr0 = cr0_saved;
-	vmcb->save.cr4 = cr4_saved;
-	vmcb->save.efer = efer_saved;
-	vmcb->save.cs.attrib = cs_attrib_saved;
+	vcpu0.vmcb->save.cr0 = cr0_saved;
+	vcpu0.vmcb->save.cr4 = cr4_saved;
+	vcpu0.vmcb->save.efer = efer_saved;
+	vcpu0.vmcb->save.cs.attrib = cs_attrib_saved;
 }
 
 static void test_cr0(void)
@@ -2157,37 +2157,37 @@ static void test_cr0(void)
 	/*
 	 * Un-setting CR0.CD and setting CR0.NW is illegal combination
 	 */
-	u64 cr0_saved = vmcb->save.cr0;
+	u64 cr0_saved = vcpu0.vmcb->save.cr0;
 	u64 cr0 = cr0_saved;
 
 	cr0 |= X86_CR0_CD;
 	cr0 &= ~X86_CR0_NW;
-	vmcb->save.cr0 = cr0;
+	vcpu0.vmcb->save.cr0 = cr0;
 	report (svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR0 CD=1,NW=0: %lx",
 		cr0);
 	cr0 |= X86_CR0_NW;
-	vmcb->save.cr0 = cr0;
+	vcpu0.vmcb->save.cr0 = cr0;
 	report (svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR0 CD=1,NW=1: %lx",
 		cr0);
 	cr0 &= ~X86_CR0_NW;
 	cr0 &= ~X86_CR0_CD;
-	vmcb->save.cr0 = cr0;
+	vcpu0.vmcb->save.cr0 = cr0;
 	report (svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR0 CD=0,NW=0: %lx",
 		cr0);
 	cr0 |= X86_CR0_NW;
-	vmcb->save.cr0 = cr0;
+	vcpu0.vmcb->save.cr0 = cr0;
 	report (svm_vmrun() == SVM_EXIT_ERR, "Test CR0 CD=0,NW=1: %lx",
 		cr0);
-	vmcb->save.cr0 = cr0_saved;
+	vcpu0.vmcb->save.cr0 = cr0_saved;
 
 	/*
 	 * CR0[63:32] are not zero
 	 */
 	cr0 = cr0_saved;
 
-	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "CR0", vmcb->save.cr0, cr0_saved,
+	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "CR0", vcpu0.vmcb->save.cr0, cr0_saved,
 				   SVM_CR0_RESERVED_MASK);
-	vmcb->save.cr0 = cr0_saved;
+	vcpu0.vmcb->save.cr0 = cr0_saved;
 }
 
 static void test_cr3(void)
@@ -2196,37 +2196,37 @@ static void test_cr3(void)
 	 * CR3 MBZ bits based on different modes:
 	 *   [63:52] - long mode
 	 */
-	u64 cr3_saved = vmcb->save.cr3;
+	u64 cr3_saved = vcpu0.vmcb->save.cr3;
 
 	SVM_TEST_CR_RESERVED_BITS(0, 63, 1, 3, cr3_saved,
 				  SVM_CR3_LONG_MBZ_MASK, SVM_EXIT_ERR, "");
 
-	vmcb->save.cr3 = cr3_saved & ~SVM_CR3_LONG_MBZ_MASK;
+	vcpu0.vmcb->save.cr3 = cr3_saved & ~SVM_CR3_LONG_MBZ_MASK;
 	report(svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR3 63:0: %lx",
-	       vmcb->save.cr3);
+	       vcpu0.vmcb->save.cr3);
 
 	/*
 	 * CR3 non-MBZ reserved bits based on different modes:
 	 *   [11:5] [2:0] - long mode (PCIDE=0)
 	 *          [2:0] - PAE legacy mode
 	 */
-	u64 cr4_saved = vmcb->save.cr4;
+	u64 cr4_saved = vcpu0.vmcb->save.cr4;
 	u64 *pdpe = npt_get_pml4e();
 
 	/*
 	 * Long mode
 	 */
 	if (this_cpu_has(X86_FEATURE_PCID)) {
-		vmcb->save.cr4 = cr4_saved | X86_CR4_PCIDE;
+		vcpu0.vmcb->save.cr4 = cr4_saved | X86_CR4_PCIDE;
 		SVM_TEST_CR_RESERVED_BITS(0, 11, 1, 3, cr3_saved,
 					  SVM_CR3_LONG_RESERVED_MASK, SVM_EXIT_VMMCALL, "(PCIDE=1) ");
 
-		vmcb->save.cr3 = cr3_saved & ~SVM_CR3_LONG_RESERVED_MASK;
+		vcpu0.vmcb->save.cr3 = cr3_saved & ~SVM_CR3_LONG_RESERVED_MASK;
 		report(svm_vmrun() == SVM_EXIT_VMMCALL, "Test CR3 63:0: %lx",
-		       vmcb->save.cr3);
+		       vcpu0.vmcb->save.cr3);
 	}
 
-	vmcb->save.cr4 = cr4_saved & ~X86_CR4_PCIDE;
+	vcpu0.vmcb->save.cr4 = cr4_saved & ~X86_CR4_PCIDE;
 
 	if (!npt_supported())
 		goto skip_npt_only;
@@ -2238,44 +2238,44 @@ static void test_cr3(void)
 				  SVM_CR3_LONG_RESERVED_MASK, SVM_EXIT_NPF, "(PCIDE=0) ");
 
 	pdpe[0] |= 1ULL;
-	vmcb->save.cr3 = cr3_saved;
+	vcpu0.vmcb->save.cr3 = cr3_saved;
 
 	/*
 	 * PAE legacy
 	 */
 	pdpe[0] &= ~1ULL;
-	vmcb->save.cr4 = cr4_saved | X86_CR4_PAE;
+	vcpu0.vmcb->save.cr4 = cr4_saved | X86_CR4_PAE;
 	SVM_TEST_CR_RESERVED_BITS(0, 2, 1, 3, cr3_saved,
 				  SVM_CR3_PAE_LEGACY_RESERVED_MASK, SVM_EXIT_NPF, "(PAE) ");
 
 	pdpe[0] |= 1ULL;
 
 skip_npt_only:
-	vmcb->save.cr3 = cr3_saved;
-	vmcb->save.cr4 = cr4_saved;
+	vcpu0.vmcb->save.cr3 = cr3_saved;
+	vcpu0.vmcb->save.cr4 = cr4_saved;
 }
 
 /* Test CR4 MBZ bits based on legacy or long modes */
 static void test_cr4(void)
 {
-	u64 cr4_saved = vmcb->save.cr4;
-	u64 efer_saved = vmcb->save.efer;
+	u64 cr4_saved = vcpu0.vmcb->save.cr4;
+	u64 efer_saved = vcpu0.vmcb->save.efer;
 	u64 efer = efer_saved;
 
 	efer &= ~EFER_LME;
-	vmcb->save.efer = efer;
+	vcpu0.vmcb->save.efer = efer;
 	SVM_TEST_CR_RESERVED_BITS(12, 31, 1, 4, cr4_saved,
 				  SVM_CR4_LEGACY_RESERVED_MASK, SVM_EXIT_ERR, "");
 
 	efer |= EFER_LME;
-	vmcb->save.efer = efer;
+	vcpu0.vmcb->save.efer = efer;
 	SVM_TEST_CR_RESERVED_BITS(12, 31, 1, 4, cr4_saved,
 				  SVM_CR4_RESERVED_MASK, SVM_EXIT_ERR, "");
 	SVM_TEST_CR_RESERVED_BITS(32, 63, 4, 4, cr4_saved,
 				  SVM_CR4_RESERVED_MASK, SVM_EXIT_ERR, "");
 
-	vmcb->save.cr4 = cr4_saved;
-	vmcb->save.efer = efer_saved;
+	vcpu0.vmcb->save.cr4 = cr4_saved;
+	vcpu0.vmcb->save.efer = efer_saved;
 }
 
 static void test_dr(void)
@@ -2283,27 +2283,27 @@ static void test_dr(void)
 	/*
 	 * DR6[63:32] and DR7[63:32] are MBZ
 	 */
-	u64 dr_saved = vmcb->save.dr6;
+	u64 dr_saved = vcpu0.vmcb->save.dr6;
 
-	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "DR6", vmcb->save.dr6, dr_saved,
+	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "DR6", vcpu0.vmcb->save.dr6, dr_saved,
 				   SVM_DR6_RESERVED_MASK);
-	vmcb->save.dr6 = dr_saved;
+	vcpu0.vmcb->save.dr6 = dr_saved;
 
-	dr_saved = vmcb->save.dr7;
-	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "DR7", vmcb->save.dr7, dr_saved,
+	dr_saved = vcpu0.vmcb->save.dr7;
+	SVM_TEST_REG_RESERVED_BITS(32, 63, 4, "DR7", vcpu0.vmcb->save.dr7, dr_saved,
 				   SVM_DR7_RESERVED_MASK);
 
-	vmcb->save.dr7 = dr_saved;
+	vcpu0.vmcb->save.dr7 = dr_saved;
 }
 
 /* TODO: verify if high 32-bits are sign- or zero-extended on bare metal */
 #define	TEST_BITMAP_ADDR(save_intercept, type, addr, exit_code,		\
 			 msg) {						\
-		vmcb->control.intercept = saved_intercept | 1ULL << type; \
+		vcpu0.vmcb->control.intercept = saved_intercept | 1ULL << type; \
 		if (type == INTERCEPT_MSR_PROT)				\
-			vmcb->control.msrpm_base_pa = addr;		\
+			vcpu0.vmcb->control.msrpm_base_pa = addr;		\
 		else							\
-			vmcb->control.iopm_base_pa = addr;		\
+			vcpu0.vmcb->control.iopm_base_pa = addr;		\
 		report(svm_vmrun() == exit_code,			\
 		       "Test %s address: %lx", msg, addr);		\
 	}
@@ -2326,7 +2326,7 @@ static void test_dr(void)
  */
 static void test_msrpm_iopm_bitmap_addrs(void)
 {
-	u64 saved_intercept = vmcb->control.intercept;
+	u64 saved_intercept = vcpu0.vmcb->control.intercept;
 	u64 addr_beyond_limit = 1ull << cpuid_maxphyaddr();
 	u64 addr = virt_to_phys(svm_get_msr_bitmap()) & (~((1ull << 12) - 1));
 	u8 *io_bitmap = svm_get_io_bitmap();
@@ -2368,7 +2368,7 @@ static void test_msrpm_iopm_bitmap_addrs(void)
 	TEST_BITMAP_ADDR(saved_intercept, INTERCEPT_IOIO_PROT, addr,
 			 SVM_EXIT_VMMCALL, "IOPM");
 
-	vmcb->control.intercept = saved_intercept;
+	vcpu0.vmcb->control.intercept = saved_intercept;
 }
 
 /*
@@ -2398,22 +2398,22 @@ static void test_canonicalization(void)
 	u64 saved_addr;
 	u64 return_value;
 	u64 addr_limit;
-	u64 vmcb_phys = virt_to_phys(vmcb);
+	u64 vmcb_phys = virt_to_phys(vcpu0.vmcb);
 
 	addr_limit = (this_cpu_has(X86_FEATURE_LA57)) ? 57 : 48;
 	u64 noncanonical_mask = NONCANONICAL & ~((1ul << addr_limit) - 1);
 
-	TEST_CANONICAL_VMLOAD(vmcb->save.fs.base, "FS");
-	TEST_CANONICAL_VMLOAD(vmcb->save.gs.base, "GS");
-	TEST_CANONICAL_VMLOAD(vmcb->save.ldtr.base, "LDTR");
-	TEST_CANONICAL_VMLOAD(vmcb->save.tr.base, "TR");
-	TEST_CANONICAL_VMLOAD(vmcb->save.kernel_gs_base, "KERNEL GS");
-	TEST_CANONICAL_VMRUN(vmcb->save.es.base, "ES");
-	TEST_CANONICAL_VMRUN(vmcb->save.cs.base, "CS");
-	TEST_CANONICAL_VMRUN(vmcb->save.ss.base, "SS");
-	TEST_CANONICAL_VMRUN(vmcb->save.ds.base, "DS");
-	TEST_CANONICAL_VMRUN(vmcb->save.gdtr.base, "GDTR");
-	TEST_CANONICAL_VMRUN(vmcb->save.idtr.base, "IDTR");
+	TEST_CANONICAL_VMLOAD(vcpu0.vmcb->save.fs.base, "FS");
+	TEST_CANONICAL_VMLOAD(vcpu0.vmcb->save.gs.base, "GS");
+	TEST_CANONICAL_VMLOAD(vcpu0.vmcb->save.ldtr.base, "LDTR");
+	TEST_CANONICAL_VMLOAD(vcpu0.vmcb->save.tr.base, "TR");
+	TEST_CANONICAL_VMLOAD(vcpu0.vmcb->save.kernel_gs_base, "KERNEL GS");
+	TEST_CANONICAL_VMRUN(vcpu0.vmcb->save.es.base, "ES");
+	TEST_CANONICAL_VMRUN(vcpu0.vmcb->save.cs.base, "CS");
+	TEST_CANONICAL_VMRUN(vcpu0.vmcb->save.ss.base, "SS");
+	TEST_CANONICAL_VMRUN(vcpu0.vmcb->save.ds.base, "DS");
+	TEST_CANONICAL_VMRUN(vcpu0.vmcb->save.gdtr.base, "GDTR");
+	TEST_CANONICAL_VMRUN(vcpu0.vmcb->save.idtr.base, "IDTR");
 }
 
 /*
@@ -2467,7 +2467,7 @@ static void svm_test_singlestep(void)
 	/*
 	 * Trap expected after completion of first guest instruction
 	 */
-	vmcb->save.rflags |= X86_EFLAGS_TF;
+	vcpu0.vmcb->save.rflags |= X86_EFLAGS_TF;
 	report (__svm_vmrun((u64)guest_rflags_test_guest) == SVM_EXIT_VMMCALL &&
 		guest_rflags_test_trap_rip == (u64)&insn2,
 		"Test EFLAGS.TF on VMRUN: trap expected  after completion of first guest instruction");
@@ -2475,17 +2475,17 @@ static void svm_test_singlestep(void)
 	 * No trap expected
 	 */
 	guest_rflags_test_trap_rip = 0;
-	vmcb->save.rip += 3;
-	vmcb->save.rflags |= X86_EFLAGS_TF;
-	report (__svm_vmrun(vmcb->save.rip) == SVM_EXIT_VMMCALL &&
+	vcpu0.vmcb->save.rip += 3;
+	vcpu0.vmcb->save.rflags |= X86_EFLAGS_TF;
+	report (__svm_vmrun(vcpu0.vmcb->save.rip) == SVM_EXIT_VMMCALL &&
 		guest_rflags_test_trap_rip == 0, "Test EFLAGS.TF on VMRUN: trap not expected");
 
 	/*
 	 * Let guest finish execution
 	 */
-	vmcb->save.rip += 3;
-	report (__svm_vmrun(vmcb->save.rip) == SVM_EXIT_VMMCALL &&
-		vmcb->save.rip == (u64)&guest_end, "Test EFLAGS.TF on VMRUN: guest execution completion");
+	vcpu0.vmcb->save.rip += 3;
+	report (__svm_vmrun(vcpu0.vmcb->save.rip) == SVM_EXIT_VMMCALL &&
+		vcpu0.vmcb->save.rip == (u64)&guest_end, "Test EFLAGS.TF on VMRUN: guest execution completion");
 }
 
 static bool volatile svm_errata_reproduced = false;
@@ -2556,7 +2556,7 @@ static void svm_vmrun_errata_test(void)
 
 static void vmload_vmsave_guest_main(struct svm_test *test)
 {
-	u64 vmcb_phys = virt_to_phys(vmcb);
+	u64 vmcb_phys = virt_to_phys(vcpu0.vmcb);
 
 	asm volatile ("vmload %0" : : "a"(vmcb_phys));
 	asm volatile ("vmsave %0" : : "a"(vmcb_phys));
@@ -2564,7 +2564,7 @@ static void vmload_vmsave_guest_main(struct svm_test *test)
 
 static void svm_vmload_vmsave(void)
 {
-	u32 intercept_saved = vmcb->control.intercept;
+	u32 intercept_saved = vcpu0.vmcb->control.intercept;
 
 	test_set_guest(vmload_vmsave_guest_main);
 
@@ -2572,49 +2572,49 @@ static void svm_vmload_vmsave(void)
 	 * Disabling intercept for VMLOAD and VMSAVE doesn't cause
 	 * respective #VMEXIT to host
 	 */
-	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
-	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
+	vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
+	vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
 	svm_vmrun();
-	report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
+	report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
 	       "VMLOAD/VMSAVE intercept: Expected VMMCALL #VMEXIT");
 
 	/*
 	 * Enabling intercept for VMLOAD and VMSAVE causes respective
 	 * #VMEXIT to host
 	 */
-	vmcb->control.intercept |= (1ULL << INTERCEPT_VMLOAD);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_VMLOAD);
 	svm_vmrun();
-	report(vmcb->control.exit_code == SVM_EXIT_VMLOAD, "Test "
+	report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMLOAD, "Test "
 	       "VMLOAD/VMSAVE intercept: Expected VMLOAD #VMEXIT");
-	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
-	vmcb->control.intercept |= (1ULL << INTERCEPT_VMSAVE);
+	vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_VMSAVE);
 	svm_vmrun();
-	report(vmcb->control.exit_code == SVM_EXIT_VMSAVE, "Test "
+	report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMSAVE, "Test "
 	       "VMLOAD/VMSAVE intercept: Expected VMSAVE #VMEXIT");
-	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
+	vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
 	svm_vmrun();
-	report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
+	report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
 	       "VMLOAD/VMSAVE intercept: Expected VMMCALL #VMEXIT");
 
-	vmcb->control.intercept |= (1ULL << INTERCEPT_VMLOAD);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_VMLOAD);
 	svm_vmrun();
-	report(vmcb->control.exit_code == SVM_EXIT_VMLOAD, "Test "
+	report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMLOAD, "Test "
 	       "VMLOAD/VMSAVE intercept: Expected VMLOAD #VMEXIT");
-	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
+	vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMLOAD);
 	svm_vmrun();
-	report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
+	report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
 	       "VMLOAD/VMSAVE intercept: Expected VMMCALL #VMEXIT");
 
-	vmcb->control.intercept |= (1ULL << INTERCEPT_VMSAVE);
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_VMSAVE);
 	svm_vmrun();
-	report(vmcb->control.exit_code == SVM_EXIT_VMSAVE, "Test "
+	report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMSAVE, "Test "
 	       "VMLOAD/VMSAVE intercept: Expected VMSAVE #VMEXIT");
-	vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
+	vcpu0.vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMSAVE);
 	svm_vmrun();
-	report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
+	report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMMCALL, "Test "
 	       "VMLOAD/VMSAVE intercept: Expected VMMCALL #VMEXIT");
 
-	vmcb->control.intercept = intercept_saved;
+	vcpu0.vmcb->control.intercept = intercept_saved;
 }
 
 static void prepare_vgif_enabled(struct svm_test *test)
@@ -2632,42 +2632,42 @@ static bool vgif_finished(struct svm_test *test)
 	switch (get_test_stage(test))
 		{
 		case 0:
-			if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+			if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 				report_fail("VMEXIT not due to vmmcall.");
 				return true;
 			}
-			vmcb->control.int_ctl |= V_GIF_ENABLED_MASK;
-			vmcb->save.rip += 3;
+			vcpu0.vmcb->control.int_ctl |= V_GIF_ENABLED_MASK;
+			vcpu0.vmcb->save.rip += 3;
 			inc_test_stage(test);
 			break;
 		case 1:
-			if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+			if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 				report_fail("VMEXIT not due to vmmcall.");
 				return true;
 			}
-			if (!(vmcb->control.int_ctl & V_GIF_MASK)) {
+			if (!(vcpu0.vmcb->control.int_ctl & V_GIF_MASK)) {
 				report_fail("Failed to set VGIF when executing STGI.");
-				vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
+				vcpu0.vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
 				return true;
 			}
 			report_pass("STGI set VGIF bit.");
-			vmcb->save.rip += 3;
+			vcpu0.vmcb->save.rip += 3;
 			inc_test_stage(test);
 			break;
 		case 2:
-			if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+			if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 				report_fail("VMEXIT not due to vmmcall.");
 				return true;
 			}
-			if (vmcb->control.int_ctl & V_GIF_MASK) {
+			if (vcpu0.vmcb->control.int_ctl & V_GIF_MASK) {
 				report_fail("Failed to clear VGIF when executing CLGI.");
-				vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
+				vcpu0.vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
 				return true;
 			}
 			report_pass("CLGI cleared VGIF bit.");
-			vmcb->save.rip += 3;
+			vcpu0.vmcb->save.rip += 3;
 			inc_test_stage(test);
-			vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
+			vcpu0.vmcb->control.int_ctl &= ~V_GIF_ENABLED_MASK;
 			break;
 		default:
 			return true;
@@ -2710,14 +2710,14 @@ static void pause_filter_run_test(int pause_iterations, int filter_value, int wa
 	pause_test_counter = pause_iterations;
 	wait_counter = wait_iterations;
 
-	vmcb->control.pause_filter_count = filter_value;
-	vmcb->control.pause_filter_thresh = threshold;
+	vcpu0.vmcb->control.pause_filter_count = filter_value;
+	vcpu0.vmcb->control.pause_filter_thresh = threshold;
 	svm_vmrun();
 
 	if (filter_value <= pause_iterations || wait_iterations < threshold)
-		report(vmcb->control.exit_code == SVM_EXIT_PAUSE, "expected PAUSE vmexit");
+		report(vcpu0.vmcb->control.exit_code == SVM_EXIT_PAUSE, "expected PAUSE vmexit");
 	else
-		report(vmcb->control.exit_code == SVM_EXIT_VMMCALL, "no expected PAUSE vmexit");
+		report(vcpu0.vmcb->control.exit_code == SVM_EXIT_VMMCALL, "no expected PAUSE vmexit");
 }
 
 static void pause_filter_test(void)
@@ -2727,7 +2727,7 @@ static void pause_filter_test(void)
 		return;
 	}
 
-	vmcb->control.intercept |= (1 << INTERCEPT_PAUSE);
+	vcpu0.vmcb->control.intercept |= (1 << INTERCEPT_PAUSE);
 
 	// filter count more that pause count - no VMexit
 	pause_filter_run_test(10, 9, 0, 0);
@@ -2850,15 +2850,15 @@ static void svm_nm_test(void)
 	write_cr0(read_cr0() & ~X86_CR0_TS);
 	test_set_guest(svm_nm_test_guest);
 
-	vmcb->save.cr0 = vmcb->save.cr0 | X86_CR0_TS;
+	vcpu0.vmcb->save.cr0 = vcpu0.vmcb->save.cr0 | X86_CR0_TS;
 	report(svm_vmrun() == SVM_EXIT_VMMCALL && nm_test_counter == 1,
 	       "fnop with CR0.TS set in L2, #NM is triggered");
 
-	vmcb->save.cr0 = (vmcb->save.cr0 & ~X86_CR0_TS) | X86_CR0_EM;
+	vcpu0.vmcb->save.cr0 = (vcpu0.vmcb->save.cr0 & ~X86_CR0_TS) | X86_CR0_EM;
 	report(svm_vmrun() == SVM_EXIT_VMMCALL && nm_test_counter == 2,
 	       "fnop with CR0.EM set in L2, #NM is triggered");
 
-	vmcb->save.cr0 = vmcb->save.cr0 & ~(X86_CR0_TS | X86_CR0_EM);
+	vcpu0.vmcb->save.cr0 = vcpu0.vmcb->save.cr0 & ~(X86_CR0_TS | X86_CR0_EM);
 	report(svm_vmrun() == SVM_EXIT_VMMCALL && nm_test_counter == 2,
 	       "fnop with CR0.TS and CR0.EM unset no #NM excpetion");
 }
@@ -2989,21 +2989,20 @@ static void svm_lbrv_test0(void)
 
 static void svm_lbrv_test1(void)
 {
-	struct svm_extra_regs* regs = get_regs();
 
 	report(true, "Test that without LBRV enabled, guest LBR state does 'leak' to the host(1)");
 
-	vmcb->save.rip = (ulong)svm_lbrv_test_guest1;
-	vmcb->control.virt_ext = 0;
+	vcpu0.vmcb->save.rip = (ulong)svm_lbrv_test_guest1;
+	vcpu0.vmcb->control.virt_ext = 0;
 
 	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
 	DO_BRANCH(host_branch1);
-	SVM_VMRUN(vmcb,regs);
+	SVM_VMRUN(vcpu0.vmcb, &vcpu0.regs);
 	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 
-	if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+	if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 		report(false, "VMEXIT not due to vmmcall. Exit reason 0x%x",
-		       vmcb->control.exit_code);
+		       vcpu0.vmcb->control.exit_code);
 		return;
 	}
 
@@ -3013,23 +3012,21 @@ static void svm_lbrv_test1(void)
 
 static void svm_lbrv_test2(void)
 {
-	struct svm_extra_regs* regs = get_regs();
-
 	report(true, "Test that without LBRV enabled, guest LBR state does 'leak' to the host(2)");
 
-	vmcb->save.rip = (ulong)svm_lbrv_test_guest2;
-	vmcb->control.virt_ext = 0;
+	vcpu0.vmcb->save.rip = (ulong)svm_lbrv_test_guest2;
+	vcpu0.vmcb->control.virt_ext = 0;
 
 	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
 	DO_BRANCH(host_branch2);
 	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
-	SVM_VMRUN(vmcb,regs);
+	SVM_VMRUN(vcpu0.vmcb, &vcpu0.regs);
 	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
 
-	if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+	if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 		report(false, "VMEXIT not due to vmmcall. Exit reason 0x%x",
-		       vmcb->control.exit_code);
+		       vcpu0.vmcb->control.exit_code);
 		return;
 	}
 
@@ -3039,32 +3036,30 @@ static void svm_lbrv_test2(void)
 
 static void svm_lbrv_nested_test1(void)
 {
-	struct svm_extra_regs* regs = get_regs();
-
 	if (!lbrv_supported()) {
 		report_skip("LBRV not supported in the guest");
 		return;
 	}
 
 	report(true, "Test that with LBRV enabled, guest LBR state doesn't leak (1)");
-	vmcb->save.rip = (ulong)svm_lbrv_test_guest1;
-	vmcb->control.virt_ext = LBR_CTL_ENABLE_MASK;
-	vmcb->save.dbgctl = DEBUGCTLMSR_LBR;
+	vcpu0.vmcb->save.rip = (ulong)svm_lbrv_test_guest1;
+	vcpu0.vmcb->control.virt_ext = LBR_CTL_ENABLE_MASK;
+	vcpu0.vmcb->save.dbgctl = DEBUGCTLMSR_LBR;
 
 	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
 	DO_BRANCH(host_branch3);
-	SVM_VMRUN(vmcb,regs);
+	SVM_VMRUN(vcpu0.vmcb, &vcpu0.regs);
 	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
 
-	if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+	if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 		report(false, "VMEXIT not due to vmmcall. Exit reason 0x%x",
-		       vmcb->control.exit_code);
+		       vcpu0.vmcb->control.exit_code);
 		return;
 	}
 
-	if (vmcb->save.dbgctl != 0) {
-		report(false, "unexpected virtual guest MSR_IA32_DEBUGCTLMSR value 0x%lx", vmcb->save.dbgctl);
+	if (vcpu0.vmcb->save.dbgctl != 0) {
+		report(false, "unexpected virtual guest MSR_IA32_DEBUGCTLMSR value 0x%lx", vcpu0.vmcb->save.dbgctl);
 		return;
 	}
 
@@ -3074,30 +3069,28 @@ static void svm_lbrv_nested_test1(void)
 
 static void svm_lbrv_nested_test2(void)
 {
-	struct svm_extra_regs* regs = get_regs();
-
 	if (!lbrv_supported()) {
 		report_skip("LBRV not supported in the guest");
 		return;
 	}
 
 	report(true, "Test that with LBRV enabled, guest LBR state doesn't leak (2)");
-	vmcb->save.rip = (ulong)svm_lbrv_test_guest2;
-	vmcb->control.virt_ext = LBR_CTL_ENABLE_MASK;
+	vcpu0.vmcb->save.rip = (ulong)svm_lbrv_test_guest2;
+	vcpu0.vmcb->control.virt_ext = LBR_CTL_ENABLE_MASK;
 
-	vmcb->save.dbgctl = 0;
-	vmcb->save.br_from = (u64)&host_branch2_from;
-	vmcb->save.br_to = (u64)&host_branch2_to;
+	vcpu0.vmcb->save.dbgctl = 0;
+	vcpu0.vmcb->save.br_from = (u64)&host_branch2_from;
+	vcpu0.vmcb->save.br_to = (u64)&host_branch2_to;
 
 	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
 	DO_BRANCH(host_branch4);
-	SVM_VMRUN(vmcb,regs);
+	SVM_VMRUN(vcpu0.vmcb, &vcpu0.regs);
 	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
 
-	if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
+	if (vcpu0.vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
 		report(false, "VMEXIT not due to vmmcall. Exit reason 0x%x",
-		       vmcb->control.exit_code);
+		       vcpu0.vmcb->control.exit_code);
 		return;
 	}
 
@@ -3142,8 +3135,8 @@ static void svm_intr_intercept_mix_run_guest(volatile int *counter, int expected
 	if (counter)
 		report(*counter == 1, "Interrupt is expected");
 
-	report (vmcb->control.exit_code == expected_vmexit, "Test expected VM exit");
-	report(vmcb->save.rflags & X86_EFLAGS_IF, "Guest should have EFLAGS.IF set now");
+	report (vcpu0.vmcb->control.exit_code == expected_vmexit, "Test expected VM exit");
+	report(vcpu0.vmcb->save.rflags & X86_EFLAGS_IF, "Guest should have EFLAGS.IF set now");
 	cli();
 }
 
@@ -3162,9 +3155,9 @@ static void svm_intr_intercept_mix_if(void)
 	// make a physical interrupt to be pending
 	handle_irq(0x55, dummy_isr);
 
-	vmcb->control.intercept |= (1 << INTERCEPT_INTR);
-	vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-	vmcb->save.rflags &= ~X86_EFLAGS_IF;
+	vcpu0.vmcb->control.intercept |= (1 << INTERCEPT_INTR);
+	vcpu0.vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+	vcpu0.vmcb->save.rflags &= ~X86_EFLAGS_IF;
 
 	test_set_guest(svm_intr_intercept_mix_if_guest);
 	irq_disable();
@@ -3195,9 +3188,9 @@ static void svm_intr_intercept_mix_gif(void)
 {
 	handle_irq(0x55, dummy_isr);
 
-	vmcb->control.intercept |= (1 << INTERCEPT_INTR);
-	vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-	vmcb->save.rflags &= ~X86_EFLAGS_IF;
+	vcpu0.vmcb->control.intercept |= (1 << INTERCEPT_INTR);
+	vcpu0.vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+	vcpu0.vmcb->save.rflags &= ~X86_EFLAGS_IF;
 
 	test_set_guest(svm_intr_intercept_mix_gif_guest);
 	irq_disable();
@@ -3225,9 +3218,9 @@ static void svm_intr_intercept_mix_gif2(void)
 {
 	handle_irq(0x55, dummy_isr);
 
-	vmcb->control.intercept |= (1 << INTERCEPT_INTR);
-	vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-	vmcb->save.rflags |= X86_EFLAGS_IF;
+	vcpu0.vmcb->control.intercept |= (1 << INTERCEPT_INTR);
+	vcpu0.vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+	vcpu0.vmcb->save.rflags |= X86_EFLAGS_IF;
 
 	test_set_guest(svm_intr_intercept_mix_gif_guest2);
 	svm_intr_intercept_mix_run_guest(&dummy_isr_recevied, SVM_EXIT_INTR);
@@ -3254,9 +3247,9 @@ static void svm_intr_intercept_mix_nmi(void)
 {
 	handle_exception(2, dummy_nmi_handler);
 
-	vmcb->control.intercept |= (1 << INTERCEPT_NMI);
-	vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-	vmcb->save.rflags |= X86_EFLAGS_IF;
+	vcpu0.vmcb->control.intercept |= (1 << INTERCEPT_NMI);
+	vcpu0.vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+	vcpu0.vmcb->save.rflags |= X86_EFLAGS_IF;
 
 	test_set_guest(svm_intr_intercept_mix_nmi_guest);
 	svm_intr_intercept_mix_run_guest(&nmi_recevied, SVM_EXIT_NMI);
@@ -3278,8 +3271,8 @@ static void svm_intr_intercept_mix_smi_guest(struct svm_test *test)
 
 static void svm_intr_intercept_mix_smi(void)
 {
-	vmcb->control.intercept |= (1 << INTERCEPT_SMI);
-	vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+	vcpu0.vmcb->control.intercept |= (1 << INTERCEPT_SMI);
+	vcpu0.vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
 	test_set_guest(svm_intr_intercept_mix_smi_guest);
 	svm_intr_intercept_mix_run_guest(NULL, SVM_EXIT_SMI);
 }
@@ -3316,21 +3309,21 @@ static void svm_shutdown_intercept_test(void)
 	 * (KVM usually doesn't intercept #PF)
 	 * */
 	test_set_guest(shutdown_intercept_test_guest);
-	vmcb->save.idtr.base = (u64)unmapped_address;
-	vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
+	vcpu0.vmcb->save.idtr.base = (u64)unmapped_address;
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
 	svm_vmrun();
-	report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (BP->PF->DF->TRIPLE_FAULT) test passed");
+	report (vcpu0.vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (BP->PF->DF->TRIPLE_FAULT) test passed");
 
 	/*
 	 * This will usually cause emulated SVM_EXIT_SHUTDOWN
 	 * (KVM usually intercepts #UD)
 	 */
 	test_set_guest(shutdown_intercept_test_guest2);
-	vmcb_ident(vmcb);
-	vmcb->save.idtr.limit = 0;
-	vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
+	vmcb_ident(vcpu0.vmcb);
+	vcpu0.vmcb->save.idtr.limit = 0;
+	vcpu0.vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
 	svm_vmrun();
-	report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (UD->DF->TRIPLE_FAULT) test passed");
+	report (vcpu0.vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (UD->DF->TRIPLE_FAULT) test passed");
 }
 
 struct svm_test svm_tests[] = {
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [kvm-unit-tests PATCH 16/16] add IPI loss stress test
  2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
                   ` (14 preceding siblings ...)
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 15/16] svm: introduce svm_vcpu Maxim Levitsky
@ 2022-10-20 15:24 ` Maxim Levitsky
  2022-10-20 20:23   ` Sean Christopherson
  15 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:24 UTC (permalink / raw)
  To: kvm; +Cc: Maxim Levitsky, Cathy Avery, Paolo Bonzini

Adds a test that sends IPIs between vCPUs and detects missing IPIs

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 x86/Makefile.common |   3 +-
 x86/ipi_stress.c    | 235 ++++++++++++++++++++++++++++++++++++++++++++
 x86/unittests.cfg   |   5 +
 3 files changed, 242 insertions(+), 1 deletion(-)
 create mode 100644 x86/ipi_stress.c

diff --git a/x86/Makefile.common b/x86/Makefile.common
index ed5e5c76..8e0b6661 100644
--- a/x86/Makefile.common
+++ b/x86/Makefile.common
@@ -86,7 +86,8 @@ tests-common = $(TEST_DIR)/vmexit.$(exe) $(TEST_DIR)/tsc.$(exe) \
                $(TEST_DIR)/eventinj.$(exe) \
                $(TEST_DIR)/smap.$(exe) \
                $(TEST_DIR)/umip.$(exe) \
-               $(TEST_DIR)/smm_int_window.$(exe)
+               $(TEST_DIR)/smm_int_window.$(exe) \
+               $(TEST_DIR)/ipi_stress.$(exe)
 
 # The following test cases are disabled when building EFI tests because they
 # use absolute addresses in their inline assembly code, which cannot compile
diff --git a/x86/ipi_stress.c b/x86/ipi_stress.c
new file mode 100644
index 00000000..185f791e
--- /dev/null
+++ b/x86/ipi_stress.c
@@ -0,0 +1,235 @@
+#include "libcflat.h"
+#include "smp.h"
+#include "alloc.h"
+#include "apic.h"
+#include "processor.h"
+#include "isr.h"
+#include "asm/barrier.h"
+#include "delay.h"
+#include "svm.h"
+#include "desc.h"
+#include "msr.h"
+#include "vm.h"
+#include "types.h"
+#include "alloc_page.h"
+#include "vmalloc.h"
+#include "svm_lib.h"
+
+u64 num_iterations = -1;
+
+volatile u64 *isr_counts;
+bool use_svm;
+int hlt_allowed = -1;
+
+
+static int get_random(int min, int max)
+{
+	/* TODO : use rdrand to seed an PRNG instead */
+	u64 random_value = rdtsc() >> 4;
+
+	return min + random_value % (max - min + 1);
+}
+
+static void ipi_interrupt_handler(isr_regs_t *r)
+{
+	isr_counts[smp_id()]++;
+	eoi();
+}
+
+static void wait_for_ipi(volatile u64 *count)
+{
+	u64 old_count = *count;
+	bool use_halt;
+
+	switch (hlt_allowed) {
+	case -1:
+		use_halt = get_random(0,10000) == 0;
+		break;
+	case 0:
+		use_halt = false;
+		break;
+	case 1:
+		use_halt = true;
+		break;
+	default:
+		use_halt = false;
+		break;
+	}
+
+	do {
+		if (use_halt)
+			asm volatile ("sti;hlt;cli\n");
+		else
+			asm volatile ("sti;nop;cli");
+
+	} while (old_count == *count);
+}
+
+/******************************************************************************************************/
+
+#ifdef __x86_64__
+
+static void l2_guest_wait_for_ipi(volatile u64 *count)
+{
+	wait_for_ipi(count);
+	asm volatile("vmmcall");
+}
+
+static void l2_guest_dummy(void)
+{
+	asm volatile("vmmcall");
+}
+
+static void wait_for_ipi_in_l2(volatile u64 *count, struct svm_vcpu *vcpu)
+{
+	u64 old_count = *count;
+	bool irq_on_vmentry = get_random(0,1) == 0;
+
+	vcpu->vmcb->save.rip = (ulong)l2_guest_wait_for_ipi;
+	vcpu->regs.rdi = (u64)count;
+
+	vcpu->vmcb->save.rip = irq_on_vmentry ? (ulong)l2_guest_dummy : (ulong)l2_guest_wait_for_ipi;
+
+	do {
+		if (irq_on_vmentry)
+			vcpu->vmcb->save.rflags |= X86_EFLAGS_IF;
+		else
+			vcpu->vmcb->save.rflags &= ~X86_EFLAGS_IF;
+
+		asm volatile("clgi;nop;sti");
+		// GIF is set by VMRUN
+		SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
+		// GIF is cleared by VMEXIT
+		asm volatile("cli;nop;stgi");
+
+		assert(vcpu->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+	} while (old_count == *count);
+}
+#endif
+
+/******************************************************************************************************/
+
+#define FIRST_TEST_VCPU 1
+
+static void vcpu_init(void *data)
+{
+	/* To make it easier to see iteration number in the trace */
+	handle_irq(0x40, ipi_interrupt_handler);
+	handle_irq(0x50, ipi_interrupt_handler);
+}
+
+static void vcpu_code(void *data)
+{
+	int ncpus = cpu_count();
+	int cpu = (long)data;
+#ifdef __x86_64__
+	struct svm_vcpu vcpu;
+#endif
+
+	u64 i;
+
+#ifdef __x86_64__
+	if (cpu == 2 && use_svm)
+		svm_vcpu_init(&vcpu);
+#endif
+
+	assert(cpu != 0);
+
+	if (cpu != FIRST_TEST_VCPU)
+		wait_for_ipi(&isr_counts[cpu]);
+
+	for (i = 0; i < num_iterations; i++)
+	{
+		u8 physical_dst = cpu == ncpus -1 ? 1 : cpu + 1;
+
+		// send IPI to a next vCPU in a circular fashion
+		apic_icr_write(APIC_INT_ASSERT |
+				APIC_DEST_PHYSICAL |
+				APIC_DM_FIXED |
+				(i % 2 ? 0x40 : 0x50),
+				physical_dst);
+
+		if (i == (num_iterations - 1) && cpu != FIRST_TEST_VCPU)
+			break;
+
+#ifdef __x86_64__
+		// wait for the IPI interrupt chain to come back to us
+		if (cpu == 2 && use_svm) {
+				wait_for_ipi_in_l2(&isr_counts[cpu], &vcpu);
+				continue;
+		}
+#endif
+		wait_for_ipi(&isr_counts[cpu]);
+	}
+}
+
+int main(int argc, void** argv)
+{
+	int cpu, ncpus = cpu_count();
+
+	assert(ncpus > 2);
+
+	if (argc > 1)
+		hlt_allowed = atol(argv[1]);
+
+	if (argc > 2)
+		num_iterations = atol(argv[2]);
+
+	setup_vm();
+
+#ifdef __x86_64__
+	if (svm_supported()) {
+		use_svm = true;
+		setup_svm();
+	}
+#endif
+
+	isr_counts = (volatile u64 *)calloc(ncpus, sizeof(u64));
+
+	printf("found %d cpus\n", ncpus);
+	printf("running for %lld iterations - test\n",
+		(long long unsigned int)num_iterations);
+
+
+	for (cpu = 0; cpu < ncpus; ++cpu)
+		on_cpu_async(cpu, vcpu_init, (void *)(long)cpu);
+
+	/* now let all the vCPUs end the IPI function*/
+	while (cpus_active() > 1)
+		  pause();
+
+	printf("starting test on all cpus but 0...\n");
+
+	for (cpu = ncpus-1; cpu >= FIRST_TEST_VCPU; cpu--)
+		on_cpu_async(cpu, vcpu_code, (void *)(long)cpu);
+
+	printf("test started, waiting to end...\n");
+
+	while (cpus_active() > 1) {
+
+		unsigned long isr_count1, isr_count2;
+
+		isr_count1 = isr_counts[1];
+		delay(5ULL*1000*1000*1000);
+		isr_count2 = isr_counts[1];
+
+		if (isr_count1 == isr_count2) {
+			printf("\n");
+			printf("hang detected!!\n");
+			break;
+		} else {
+			printf("made %ld IPIs \n", (isr_count2 - isr_count1)*(ncpus-1));
+		}
+	}
+
+	printf("\n");
+
+	for (cpu = 1; cpu < ncpus; ++cpu)
+		report(isr_counts[cpu] == num_iterations,
+				"Number of IPIs match (%lld)",
+				(long long unsigned int)isr_counts[cpu]);
+
+	free((void*)isr_counts);
+	return report_summary();
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index ebb3fdfc..7655d2ba 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -61,6 +61,11 @@ smp = 2
 file = smptest.flat
 smp = 3
 
+[ipi_stress]
+file = ipi_stress.flat
+extra_params = -cpu host,-x2apic,-svm,-hypervisor -global kvm-pit.lost_tick_policy=discard -machine kernel-irqchip=on -append '0 50000'
+smp = 4
+
 [vmexit_cpuid]
 file = vmexit.flat
 extra_params = -append 'cpuid'
-- 
2.26.3


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test.
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test Maxim Levitsky
@ 2022-10-20 15:26   ` Maxim Levitsky
  2022-10-20 19:06     ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-20 15:26 UTC (permalink / raw)
  To: kvm; +Cc: Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 18:23 +0300, Maxim Levitsky wrote:
> Test that if L2 triggers a shutdown, this VM exits to L1
> and doesn't crash the host.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  x86/svm_tests.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 51 insertions(+)
> 
> diff --git a/x86/svm_tests.c b/x86/svm_tests.c
> index 19b35e95..2c29c2b0 100644
> --- a/x86/svm_tests.c
> +++ b/x86/svm_tests.c
> @@ -10,6 +10,7 @@
>  #include "isr.h"
>  #include "apic.h"
>  #include "delay.h"
> +#include "vmalloc.h"
>  
>  #define SVM_EXIT_MAX_DR_INTERCEPT 0x3f
>  
> @@ -3270,6 +3271,55 @@ static void svm_intr_intercept_mix_smi(void)
>  	svm_intr_intercept_mix_run_guest(NULL, SVM_EXIT_SMI);
>  }
>  
> +
> +static void shutdown_intercept_test_guest(struct svm_test *test)
> +{
> +	asm volatile ("int3");
> +	report_fail("should not reach here\n");
> +
> +}
> +
> +static void shutdown_intercept_test_guest2(struct svm_test *test)
> +{
> +	asm volatile ("ud2");
> +	report_fail("should not reach here\n");
> +
> +}
> +
> +static void svm_shutdown_intercept_test(void)
> +{
> +	void* unmapped_address = alloc_vpage();
> +
> +	/*
> +	 * Test that shutdown vm exit doesn't crash L0
> +	 *
> +	 * Test both native and emulated triple fault
> +	 * (due to exception merging)
> +	 */
> +
> +
> +	/*
> +	 * This will usually cause native SVM_EXIT_SHUTDOWN
> +	 * (KVM usually doesn't intercept #PF)
> +	 * */
> +	test_set_guest(shutdown_intercept_test_guest);
> +	vmcb->save.idtr.base = (u64)unmapped_address;
> +	vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
> +	svm_vmrun();
> +	report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (BP->PF->DF->TRIPLE_FAULT) test passed");
> +
> +	/*
> +	 * This will usually cause emulated SVM_EXIT_SHUTDOWN
> +	 * (KVM usually intercepts #UD)
> +	 */
> +	test_set_guest(shutdown_intercept_test_guest2);
> +	vmcb_ident(vmcb);
> +	vmcb->save.idtr.limit = 0;
> +	vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
> +	svm_vmrun();
> +	report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (UD->DF->TRIPLE_FAULT) test passed");
> +}
> +
>  struct svm_test svm_tests[] = {
>  	{ "null", default_supported, default_prepare,
>  	  default_prepare_gif_clear, null_test,
> @@ -3382,6 +3432,7 @@ struct svm_test svm_tests[] = {
>  	TEST(svm_intr_intercept_mix_smi),
>  	TEST(svm_tsc_scale_test),
>  	TEST(pause_filter_test),
> +	TEST(svm_shutdown_intercept_test),
>  	{ NULL, NULL, NULL, NULL, NULL, NULL, NULL }
>  };
>  


Note that on unpatched KVM, this test will cause a kernel panic on the host if run.

I sent a patch today with a fix for this.

Best regards,
	MaxiM Levitsky


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow Maxim Levitsky
@ 2022-10-20 18:01   ` Sean Christopherson
  2022-10-24 12:36     ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 18:01 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> Tests that need interrupt shadow can't rely on irq_enable function anyway,
> as its comment states,  and it is useful to know for sure that interrupts
> are enabled after the call to this function.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  lib/x86/processor.h       | 9 ++++-----
>  x86/apic.c                | 1 -
>  x86/ioapic.c              | 1 -
>  x86/svm_tests.c           | 9 ---------
>  x86/tscdeadline_latency.c | 1 -
>  x86/vmx_tests.c           | 7 -------
>  6 files changed, 4 insertions(+), 24 deletions(-)
> 
> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> index 03242206..9db07346 100644
> --- a/lib/x86/processor.h
> +++ b/lib/x86/processor.h
> @@ -720,13 +720,12 @@ static inline void irq_disable(void)
>  	asm volatile("cli");
>  }
>  
> -/* Note that irq_enable() does not ensure an interrupt shadow due
> - * to the vagaries of compiler optimizations.  If you need the
> - * shadow, use a single asm with "sti" and the instruction after it.
> - */
>  static inline void irq_enable(void)
>  {
> -	asm volatile("sti");
> +	asm volatile(
> +			"sti \n\t"

Formatting is odd.  Doesn't really matter, but I think this can simply be:

static inline void sti_nop(void)
{
	asm volatile("sti; nop");
}


> +			"nop\n\t"

I like the idea of a helper to enable IRQs and consume pending interrupts, but I
think we should add a new helper instead of changing irq_enable().

Hmm, or alternatively, kill off irq_enable() and irq_disable() entirely and instead
add sti_nop().  I like this idea even better.  The helpers are all x86-specific,
so there's no need to add a layer of abstraction, and sti() + sti_nop() has the
benefit of making it very clear what code is being emitted without having to come
up with clever function names.

And I think we should go even further and provide a helper to do the entire sequence
of enable->nop->disable, which is a very common pattern.  No idea what to call
this one, though I suppose sti_nop_cli() would work.

My vote is to replace all irq_enable() and irq_disable() usage with sti() and cli(),
and then introduce sti_nop() and sti_nop_cli() (or whatever it gets called) and
convert users as appropriate.

> +	);
>  }
>  
>  static inline void invlpg(volatile void *va)
> diff --git a/x86/apic.c b/x86/apic.c
> index 23508ad5..a8964d88 100644
> --- a/x86/apic.c
> +++ b/x86/apic.c
> @@ -36,7 +36,6 @@ static void __test_tsc_deadline_timer(void)
>      irq_enable();
>  
>      wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC));
> -    asm volatile ("nop");

I'm not entirely sure the existing nop is necessary here, but it's a functional
change since it hoists the nop above the WRMSR.  To be safe, probably best to
leave this as-is for now.

>      report(tdt_count == 1, "tsc deadline timer");
>      report(rdmsr(MSR_IA32_TSCDEADLINE) == 0, "tsc deadline timer clearing");
>  }

...

> diff --git a/x86/tscdeadline_latency.c b/x86/tscdeadline_latency.c
> index a3bc4ea4..c54530dd 100644
> --- a/x86/tscdeadline_latency.c
> +++ b/x86/tscdeadline_latency.c
> @@ -73,7 +73,6 @@ static void start_tsc_deadline_timer(void)
>      irq_enable();
>  
>      wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC)+delta);
> -    asm volatile ("nop");

Another functional change that should be skipped, at least for now.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 11/16] svm: add svm_suported
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 11/16] svm: add svm_suported Maxim Levitsky
@ 2022-10-20 18:21   ` Sean Christopherson
  2022-10-24 12:40     ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 18:21 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

s/suported/supported

On Thu, Oct 20, 2022, Maxim Levitsky wrote:

Please provide a changelog.

> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  lib/x86/svm_lib.h | 5 +++++
>  x86/svm.c         | 2 +-
>  2 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
> index 04910281..2d13b066 100644
> --- a/lib/x86/svm_lib.h
> +++ b/lib/x86/svm_lib.h
> @@ -4,6 +4,11 @@
>  #include <x86/svm.h>
>  #include "processor.h"
>  
> +static inline bool svm_supported(void)
> +{
> +	return this_cpu_has(X86_FEATURE_SVM);

Why add a wrapper?  The only reason NPT and a few others have wrappers is to
play nice with svm_test's "bool (*supported)(void)" hook.

I would rather go the opposite direction and get rid of the wrappers, which IMO
only make it harder to understand what is being checked.

E.g. add a required_feature to the tests and use that for all X86_FEATURE_*
checks instead of adding wrappers.  And unless there's a supported helper I'm not
seeing, the .supported hook can go away entirely by adding a dedicated "smp_required"
flag.

We'd probaby want helper macros for SMP vs. non-SMP, e.g.

#define SVM_V1_TEST(name, feature, ...)
	{ #name, feature, false, ... }
#define SVM_SMP_V1_TEST(name, feature, ...)
	{ #name, feature, true, ... }

diff --git a/x86/svm.c b/x86/svm.c
index 7aa3ebd2..2a412c27 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -170,6 +170,7 @@ test_wanted(const char *name, char *filters[], int filter_count)
 
 int run_svm_tests(int ac, char **av, struct svm_test *svm_tests)
 {
+       bool smp_supported = cpu_count() > 1;
        int i = 0;
 
        ac--;
@@ -187,7 +188,10 @@ int run_svm_tests(int ac, char **av, struct svm_test *svm_tests)
        for (; svm_tests[i].name != NULL; i++) {
                if (!test_wanted(svm_tests[i].name, av, ac))
                        continue;
-               if (svm_tests[i].supported && !svm_tests[i].supported())
+               if (svm_tests[i].required_feature &&
+                   !this_cpu_has(svm_tests[i].required_feature))
+                       continue;
+               if (svm_tests[i].smp_required && !smp_supported)
                        continue;
                if (svm_tests[i].v2 == NULL) {
                        if (svm_tests[i].on_vcpu) {
diff --git a/x86/svm.h b/x86/svm.h
index 0c40a086..632287ca 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -9,7 +9,8 @@
 
 struct svm_test {
        const char *name;
-       bool (*supported)(void);
+       u64 required_feature;
+       bool smp_required;
        void (*prepare)(struct svm_test *test);
        void (*prepare_gif_clear)(struct svm_test *test);
        void (*guest_func)(struct svm_test *test);

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 13/16] svm: move vmcb_ident to svm_lib.c
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 13/16] svm: move vmcb_ident " Maxim Levitsky
@ 2022-10-20 18:37   ` Sean Christopherson
  2022-10-24 12:46     ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 18:37 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:

Changelog please.  
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  lib/x86/svm_lib.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
>  lib/x86/svm_lib.h |  4 ++++

What about calling these simply svm.{c,h} and renaming x86/svm.{c,h} to something
like svm_common.{c,h}?  Long term, it would be wonderful to rid of x86/svm.{c,h}
by genericizing the test framework, e.g. there's a ton of duplicate code between
SVM and VMX.

>  x86/svm.c         | 54 -----------------------------------------------
>  x86/svm.h         |  1 -
>  4 files changed, 58 insertions(+), 55 deletions(-)
> 
> diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
> index 9e82e363..2b067c65 100644
> --- a/lib/x86/svm_lib.c
> +++ b/lib/x86/svm_lib.c
> @@ -103,3 +103,57 @@ void setup_svm(void)
>  
>  	setup_npt();
>  }
> +
> +void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
> +			 u64 base, u32 limit, u32 attr)

Funky indentation and wrap.

void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, u64 base, u32 limit,
		  u32 attr)

> +{
> +	seg->selector = selector;
> +	seg->attrib = attr;
> +	seg->limit = limit;
> +	seg->base = base;
> +}
> +
> +void vmcb_ident(struct vmcb *vmcb)
> +{
> +	u64 vmcb_phys = virt_to_phys(vmcb);
> +	struct vmcb_save_area *save = &vmcb->save;
> +	struct vmcb_control_area *ctrl = &vmcb->control;
> +	u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK

Ugh, a #define for '3' and '9' (in lib/x86/desc.h?) would be nice, but that can
be left for another day/patch.

> +		| SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;

Pre-existing mess, but can you move the '|' to the previous line?  And align the
code?

> +	u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
> +		| SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;

| on the previous line.

	u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK |
			    SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
	u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK |
			    SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros Maxim Levitsky
@ 2022-10-20 18:55   ` Sean Christopherson
  2022-10-24 12:45     ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 18:55 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:

Changelog please.  This patch in particular is extremely difficult to review
without some explanation of what is being done, and why.

If it's not too much trouble, splitting this over multiple patches would be nice.

> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  lib/x86/svm_lib.h | 58 +++++++++++++++++++++++++++++++++++++++
>  x86/svm.c         | 51 ++++++++++------------------------
>  x86/svm.h         | 70 ++---------------------------------------------
>  x86/svm_tests.c   | 24 ++++++++++------
>  4 files changed, 91 insertions(+), 112 deletions(-)
> 
> diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
> index 27c3b137..59db26de 100644
> --- a/lib/x86/svm_lib.h
> +++ b/lib/x86/svm_lib.h
> @@ -71,4 +71,62 @@ u8* svm_get_io_bitmap(void);
>  #define MSR_BITMAP_SIZE 8192
>  
>  
> +struct svm_extra_regs

Why not just svm_gprs?  This could even include RAX by grabbing it from the VMCB
after VMRUN.

> +{
> +    u64 rbx;
> +    u64 rcx;
> +    u64 rdx;
> +    u64 rbp;
> +    u64 rsi;
> +    u64 rdi;
> +    u64 r8;
> +    u64 r9;
> +    u64 r10;
> +    u64 r11;
> +    u64 r12;
> +    u64 r13;
> +    u64 r14;
> +    u64 r15;

Tab instead of spaces.

> +};
> +
> +#define SWAP_GPRS(reg) \
> +		"xchg %%rcx, 0x08(%%" reg ")\n\t"       \

No need for 2-tab indentation.

> +		"xchg %%rdx, 0x10(%%" reg ")\n\t"       \
> +		"xchg %%rbp, 0x18(%%" reg ")\n\t"       \
> +		"xchg %%rsi, 0x20(%%" reg ")\n\t"       \
> +		"xchg %%rdi, 0x28(%%" reg ")\n\t"       \
> +		"xchg %%r8,  0x30(%%" reg ")\n\t"       \
> +		"xchg %%r9,  0x38(%%" reg ")\n\t"       \
> +		"xchg %%r10, 0x40(%%" reg ")\n\t"       \
> +		"xchg %%r11, 0x48(%%" reg ")\n\t"       \
> +		"xchg %%r12, 0x50(%%" reg ")\n\t"       \
> +		"xchg %%r13, 0x58(%%" reg ")\n\t"       \
> +		"xchg %%r14, 0x60(%%" reg ")\n\t"       \
> +		"xchg %%r15, 0x68(%%" reg ")\n\t"       \
> +		\

Extra line.

> +		"xchg %%rbx, 0x00(%%" reg ")\n\t"       \

Why is RBX last here, but first in the struct?  Ah, because the initial swap uses
RBX as the scratch register.  Why use RAX for the post-VMRUN swap?  AFAICT, that's
completely arbitrary.

> +
> +

> +#define __SVM_VMRUN(vmcb, regs, label)          \
> +		asm volatile (                          \

Unnecessarily deep indentation.

> +			"vmload %%rax\n\t"                  \
> +			"push %%rax\n\t"                    \
> +			"push %%rbx\n\t"                    \
> +			SWAP_GPRS("rbx")                    \
> +			".global " label "\n\t"             \
> +			label ": vmrun %%rax\n\t"           \
> +			"vmsave %%rax\n\t"                  \
> +			"pop %%rax\n\t"                     \
> +			SWAP_GPRS("rax")                    \
> +			"pop %%rax\n\t"                     \
> +			:                                   \
> +			: "a" (virt_to_phys(vmcb)),         \
> +			  "b"(regs)                         \
> +			/* clobbers*/                       \
> +			: "memory"                          \
> +		);

If we're going to rewrite this, why not turn it into a proper assembly routine?
E.g. the whole test_run() noinline thing just so that vmrun_rip isn't redefined
is gross.

> diff --git a/x86/svm.c b/x86/svm.c
> index 37b4cd38..9484a6d1 100644
> --- a/x86/svm.c
> +++ b/x86/svm.c
> @@ -76,11 +76,11 @@ static void test_thunk(struct svm_test *test)
>  	vmmcall();
>  }
>  
> -struct regs regs;
> +struct svm_extra_regs regs;
>  
> -struct regs get_regs(void)
> +struct svm_extra_regs* get_regs(void)
>  {
> -	return regs;
> +	return &regs;

This isn't strictly necessary, is it?  I.e. avoiding the copy can be done in a
separate patch, no?

> @@ -2996,7 +2998,7 @@ static void svm_lbrv_test1(void)
>  
>  	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
>  	DO_BRANCH(host_branch1);
> -	SVM_BARE_VMRUN;
> +	SVM_VMRUN(vmcb,regs);

Space after the comma.  Multiple cases below too.

>  	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
>  
>  	if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
> @@ -3011,6 +3013,8 @@ static void svm_lbrv_test1(void)
>  
>  static void svm_lbrv_test2(void)
>  {
> +	struct svm_extra_regs* regs = get_regs();
> +
>  	report(true, "Test that without LBRV enabled, guest LBR state does 'leak' to the host(2)");
>  
>  	vmcb->save.rip = (ulong)svm_lbrv_test_guest2;
> @@ -3019,7 +3023,7 @@ static void svm_lbrv_test2(void)
>  	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
>  	DO_BRANCH(host_branch2);
>  	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
> -	SVM_BARE_VMRUN;
> +	SVM_VMRUN(vmcb,regs);
>  	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
>  	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
>  
> @@ -3035,6 +3039,8 @@ static void svm_lbrv_test2(void)
>  
>  static void svm_lbrv_nested_test1(void)
>  {
> +	struct svm_extra_regs* regs = get_regs();
> +
>  	if (!lbrv_supported()) {
>  		report_skip("LBRV not supported in the guest");
>  		return;
> @@ -3047,7 +3053,7 @@ static void svm_lbrv_nested_test1(void)
>  
>  	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
>  	DO_BRANCH(host_branch3);
> -	SVM_BARE_VMRUN;
> +	SVM_VMRUN(vmcb,regs);
>  	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
>  	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
>  
> @@ -3068,6 +3074,8 @@ static void svm_lbrv_nested_test1(void)
>  
>  static void svm_lbrv_nested_test2(void)
>  {
> +	struct svm_extra_regs* regs = get_regs();
> +
>  	if (!lbrv_supported()) {
>  		report_skip("LBRV not supported in the guest");
>  		return;
> @@ -3083,7 +3091,7 @@ static void svm_lbrv_nested_test2(void)
>  
>  	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
>  	DO_BRANCH(host_branch4);
> -	SVM_BARE_VMRUN;
> +	SVM_VMRUN(vmcb,regs);
>  	dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
>  	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
>  
> -- 
> 2.26.3
> 

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 15/16] svm: introduce svm_vcpu
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 15/16] svm: introduce svm_vcpu Maxim Levitsky
@ 2022-10-20 19:02   ` Sean Christopherson
  2022-10-24 12:46     ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 19:02 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> This adds minimum amout of code to support tests that
> run SVM on more that one vCPU.

s/that/than

> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  lib/x86/svm_lib.c |   9 +
>  lib/x86/svm_lib.h |  10 +
>  x86/svm.c         |  37 ++-
>  x86/svm.h         |   5 +-
>  x86/svm_npt.c     |  44 ++--
>  x86/svm_tests.c   | 615 +++++++++++++++++++++++-----------------------
>  6 files changed, 362 insertions(+), 358 deletions(-)
> 
> diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
> index 2b067c65..1152c497 100644
> --- a/lib/x86/svm_lib.c
> +++ b/lib/x86/svm_lib.c
> @@ -157,3 +157,12 @@ void vmcb_ident(struct vmcb *vmcb)
>  		ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
>  	}
>  }
> +
> +void svm_vcpu_init(struct svm_vcpu *vcpu)
> +{
> +	vcpu->vmcb = alloc_page();
> +	vmcb_ident(vcpu->vmcb);
> +	memset(&vcpu->regs, 0, sizeof(vcpu->regs));
> +	vcpu->stack = alloc_pages(4) + (PAGE_SIZE << 4);
> +	vcpu->vmcb->save.rsp = (ulong)(vcpu->stack);
> +}
> diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
> index 59db26de..c6957dba 100644
> --- a/lib/x86/svm_lib.h
> +++ b/lib/x86/svm_lib.h
> @@ -89,6 +89,16 @@ struct svm_extra_regs
>      u64 r15;
>  };
>  
> +
> +struct svm_vcpu
> +{
> +	struct vmcb *vmcb;
> +	struct svm_extra_regs regs;
> +	void *stack;
> +};
> +
> +void svm_vcpu_init(struct svm_vcpu *vcpu);
> +
>  #define SWAP_GPRS(reg) \
>  		"xchg %%rcx, 0x08(%%" reg ")\n\t"       \
>  		"xchg %%rdx, 0x10(%%" reg ")\n\t"       \
> diff --git a/x86/svm.c b/x86/svm.c
> index 9484a6d1..7aa3ebd2 100644
> --- a/x86/svm.c
> +++ b/x86/svm.c
> @@ -16,7 +16,7 @@
>  #include "apic.h"
>  #include "svm_lib.h"
>  
> -struct vmcb *vmcb;
> +struct svm_vcpu vcpu0;

It's not strictly vCPU0, e.g. svm_init_intercept_test() deliberately runs on
vCPU2, presumably to avoid running on the BSP?

Since this is churning a lot of code anyways, why not clean this all up and have
run_svm_tests() dynamically allocate state instead of relying on global data?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test.
  2022-10-20 15:26   ` Maxim Levitsky
@ 2022-10-20 19:06     ` Sean Christopherson
  2022-10-24 12:39       ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 19:06 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> On Thu, 2022-10-20 at 18:23 +0300, Maxim Levitsky wrote:
> > +static void svm_shutdown_intercept_test(void)
> > +{
> > +	void* unmapped_address = alloc_vpage();
> > +
> > +	/*
> > +	 * Test that shutdown vm exit doesn't crash L0
> > +	 *
> > +	 * Test both native and emulated triple fault
> > +	 * (due to exception merging)
> > +	 */
> > +
> > +
> > +	/*
> > +	 * This will usually cause native SVM_EXIT_SHUTDOWN
> > +	 * (KVM usually doesn't intercept #PF)
> > +	 * */
> > +	test_set_guest(shutdown_intercept_test_guest);
> > +	vmcb->save.idtr.base = (u64)unmapped_address;
> > +	vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
> > +	svm_vmrun();
> > +	report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (BP->PF->DF->TRIPLE_FAULT) test passed");
> > +
> > +	/*
> > +	 * This will usually cause emulated SVM_EXIT_SHUTDOWN
> > +	 * (KVM usually intercepts #UD)
> > +	 */
> > +	test_set_guest(shutdown_intercept_test_guest2);
> > +	vmcb_ident(vmcb);
> > +	vmcb->save.idtr.limit = 0;
> > +	vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
> > +	svm_vmrun();
> > +	report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (UD->DF->TRIPLE_FAULT) test passed");
> > +}
> > +
> >  struct svm_test svm_tests[] = {
> >  	{ "null", default_supported, default_prepare,
> >  	  default_prepare_gif_clear, null_test,
> > @@ -3382,6 +3432,7 @@ struct svm_test svm_tests[] = {
> >  	TEST(svm_intr_intercept_mix_smi),
> >  	TEST(svm_tsc_scale_test),
> >  	TEST(pause_filter_test),
> > +	TEST(svm_shutdown_intercept_test),
> >  	{ NULL, NULL, NULL, NULL, NULL, NULL, NULL }
> >  };
> 
> Note that on unpatched KVM, this test will cause a kernel panic on the host
> if run.
> 
> I sent a patch today with a fix for this.

I'm confused.  The KVM patches address a bug where KVM screws up if the SHUTDOWN
(or INIT) is _not_ intercepted by L1, but the test here does intercept SHUTDOWN.
Are there more bugs lurking in KVM, or am I missing something?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 09/16] svm: move svm spec definitions to lib/x86/svm.h
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 09/16] svm: move svm spec definitions to lib/x86/svm.h Maxim Levitsky
@ 2022-10-20 19:08   ` Sean Christopherson
  0 siblings, 0 replies; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 19:08 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> This is first step of separating SVM code to a library
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  lib/x86/svm.h | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  x86/svm.h     | 359 +------------------------------------------------
>  2 files changed, 365 insertions(+), 358 deletions(-)
>  create mode 100644 lib/x86/svm.h
> 

...

> +enum {
> +		VMCB_CLEAN_INTERCEPTS = 1, /* Intercept vectors, TSC offset, pause filter count */

Indentation is too deep.

> +		VMCB_CLEAN_PERM_MAP = 2,   /* IOPM Base and MSRPM Base */
> +		VMCB_CLEAN_ASID = 4,	   /* ASID */
> +		VMCB_CLEAN_INTR = 8,	   /* int_ctl, int_vector */
> +		VMCB_CLEAN_NPT = 16,	   /* npt_en, nCR3, gPAT */
> +		VMCB_CLEAN_CR = 32,		/* CR0, CR3, CR4, EFER */
> +		VMCB_CLEAN_DR = 64,		/* DR6, DR7 */
> +		VMCB_CLEAN_DT = 128,	   /* GDT, IDT */
> +		VMCB_CLEAN_SEG = 256,	  /* CS, DS, SS, ES, CPL */
> +		VMCB_CLEAN_CR2 = 512,	  /* CR2 only */
> +		VMCB_CLEAN_LBR = 1024,	 /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
> +		VMCB_CLEAN_AVIC = 2048,	/* APIC_BAR, APIC_BACKING_PAGE,
> +					  PHYSICAL_TABLE pointer, LOGICAL_TABLE pointer */
> +		VMCB_CLEAN_ALL = 4095,
> +};

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer Maxim Levitsky
@ 2022-10-20 19:14   ` Sean Christopherson
  2022-10-24 12:37     ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 19:14 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> Add a few functions to apic.c to make it easier to enable and disable
> the local apic timer.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  lib/x86/apic.c | 37 +++++++++++++++++++++++++++++++++++++
>  lib/x86/apic.h |  6 ++++++
>  2 files changed, 43 insertions(+)
> 
> diff --git a/lib/x86/apic.c b/lib/x86/apic.c
> index 5131525a..dc6d3862 100644
> --- a/lib/x86/apic.c
> +++ b/lib/x86/apic.c
> @@ -256,3 +256,40 @@ void init_apic_map(void)
>  			id_map[j++] = i;
>  	}
>  }
> +
> +void apic_setup_timer(int vector, bool periodic)
> +{
> +	/* APIC runs with 'CPU core clock' divided by value in APIC_TDCR */
> +
> +	u32 lvtt = vector |
> +			(periodic ? APIC_LVT_TIMER_PERIODIC : APIC_LVT_TIMER_ONESHOT);

Rather than take @periodic, take the mode.  That way this funky ternary operator
goes away and the callers are self-tdocumenting, e.g. this

	apic_setup_timer(TIMER_VECTOR, APIC_LVT_TIMER_PERIODIC);

is more obvious than

	apic_setup_timer(TIMER_VECTOR, true);
	
> +
> +	apic_cleanup_timer();
> +	apic_write(APIC_TDCR, APIC_TDR_DIV_1);
> +	apic_write(APIC_LVTT, lvtt);
> +}
> +
> +void apic_start_timer(u32 counter)
> +{
> +	apic_write(APIC_TMICT, counter);
> +}
> +
> +void apic_stop_timer(void)
> +{
> +	apic_write(APIC_TMICT, 0);
> +}
> +
> +void apic_cleanup_timer(void)
> +{
> +	u32 lvtt = apic_read(APIC_LVTT);
> +
> +	// stop the counter
> +	apic_stop_timer();
> +
> +	// mask the timer interrupt
> +	apic_write(APIC_LVTT, lvtt | APIC_LVT_MASKED);
> +
> +	// ensure that a pending timer is serviced
> +	irq_enable();

Jumping back to the "nop" patch, I'm reinforcing my vote to add sti_nop().  I
actually starting typing a response to say this is broken before remembering that
a nop got added to irq_enable().

> +	irq_disable();
> +}
> diff --git a/lib/x86/apic.h b/lib/x86/apic.h
> index 6d27f047..db691e2a 100644
> --- a/lib/x86/apic.h
> +++ b/lib/x86/apic.h
> @@ -58,6 +58,12 @@ void disable_apic(void);
>  void reset_apic(void);
>  void init_apic_map(void);
>  
> +void apic_cleanup_timer(void);
> +void apic_setup_timer(int vector, bool periodic);
> +
> +void apic_start_timer(u32 counter);
> +void apic_stop_timer(void);
> +
>  /* Converts byte-addressable APIC register offset to 4-byte offset. */
>  static inline u32 apic_reg_index(u32 reg)
>  {
> -- 
> 2.26.3
> 

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 07/16] x86: Add a simple test for SYSENTER instruction.
  2022-10-20 15:23 ` [kvm-unit-tests PATCH 07/16] x86: Add a simple test for SYSENTER instruction Maxim Levitsky
@ 2022-10-20 19:25   ` Sean Christopherson
  2022-10-24 12:38     ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 19:25 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> Run the test with Intel's vendor ID and in the long mode,
> to test the emulation of this instruction on AMD.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  x86/Makefile.x86_64 |   2 +
>  x86/sysenter.c      | 127 ++++++++++++++++++++++++++++++++++++++++++++
>  x86/unittests.cfg   |   5 ++
>  3 files changed, 134 insertions(+)
>  create mode 100644 x86/sysenter.c
> 
> diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
> index 865da07d..8ce53650 100644
> --- a/x86/Makefile.x86_64
> +++ b/x86/Makefile.x86_64
> @@ -33,6 +33,7 @@ tests += $(TEST_DIR)/vmware_backdoors.$(exe)
>  tests += $(TEST_DIR)/rdpru.$(exe)
>  tests += $(TEST_DIR)/pks.$(exe)
>  tests += $(TEST_DIR)/pmu_lbr.$(exe)
> +tests += $(TEST_DIR)/sysenter.$(exe)
>  
>  
>  ifeq ($(CONFIG_EFI),y)
> @@ -60,3 +61,4 @@ $(TEST_DIR)/hyperv_clock.$(bin): $(TEST_DIR)/hyperv_clock.o
>  $(TEST_DIR)/vmx.$(bin): $(TEST_DIR)/vmx_tests.o
>  $(TEST_DIR)/svm.$(bin): $(TEST_DIR)/svm_tests.o
>  $(TEST_DIR)/svm_npt.$(bin): $(TEST_DIR)/svm_npt.o
> +$(TEST_DIR)/sysenter.o: CFLAGS += -Wa,-mintel64
> diff --git a/x86/sysenter.c b/x86/sysenter.c
> new file mode 100644
> index 00000000..6c32fea4
> --- /dev/null
> +++ b/x86/sysenter.c
> @@ -0,0 +1,127 @@
> +#include "alloc.h"
> +#include "libcflat.h"
> +#include "processor.h"
> +#include "msr.h"
> +#include "desc.h"
> +
> +
> +// undefine this to run the syscall instruction in 64 bit mode.
> +// this won't work on AMD due to disabled code in the emulator.
> +#define COMP32

Why not run the test in both 32-bit and 64-bit mode, and skip the 64-bit mode
version if the vCPU model is AMD?

> +
> +int main(int ac, char **av)
> +{
> +    extern void sysenter_target(void);
> +    extern void test_done(void);

Tabs instead of spaces.

> +
> +    setup_vm();
> +
> +    int gdt_index = 0x50 >> 3;
> +    ulong rax = 0xDEAD;
> +
> +    /* init the sysenter GDT block */
> +    /*gdt64[gdt_index+0] = gdt64[KERNEL_CS >> 3];
> +    gdt64[gdt_index+1] = gdt64[KERNEL_DS >> 3];
> +    gdt64[gdt_index+2] = gdt64[USER_CS >> 3];
> +    gdt64[gdt_index+3] = gdt64[USER_DS >> 3];*/
> +
> +    /* init the sysenter msrs*/
> +    wrmsr(MSR_IA32_SYSENTER_CS, gdt_index << 3);
> +    wrmsr(MSR_IA32_SYSENTER_ESP, 0xAAFFFFFFFF);
> +    wrmsr(MSR_IA32_SYSENTER_EIP, (uint64_t)sysenter_target);
> +
> +    u8 *thunk = (u8*)malloc(50);
> +    u8 *tmp = thunk;
> +
> +    printf("Thunk at 0x%lx\n", (u64)thunk);
> +
> +    /* movabs test_done, %rdx*/
> +    *tmp++ = 0x48; *tmp++ = 0xBA;
> +    *(u64 *)tmp = (uint64_t)test_done; tmp += 8;
> +    /* jmp %%rdx*/
> +    *tmp++ = 0xFF; *tmp++ = 0xe2;
> +
> +    asm volatile (

Can we add a helper sysenter_asm.S or whatever instead of making this a gigantic
inline asm blob?  And then have separate routines for 32-bit vs. 64-bit?  That'd
require a bit of code duplication, but macros could be used to dedup the common
parts if necessary.

And with a .S file, I believe there's no need to dynamically generate the thunk,
e.g. pass the jump target through a GPR that's not modified/used by SYSENTER.

> +#ifdef COMP32
> +        "# switch to comp32, mode prior to running the test\n"
> +        "ljmpl *1f\n"
> +        "1:\n"
> +        ".long 1f\n"
> +        ".long " xstr(KERNEL_CS32) "\n"
> +        "1:\n"
> +        ".code32\n"
> +#else
> +		"# store the 64 bit thunk address to rdx\n"
> +		"mov %[thunk], %%rdx\n"
> +#endif

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 16/16] add IPI loss stress test
  2022-10-20 15:24 ` [kvm-unit-tests PATCH 16/16] add IPI loss stress test Maxim Levitsky
@ 2022-10-20 20:23   ` Sean Christopherson
  2022-10-24 12:54     ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-20 20:23 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> +u64 num_iterations = -1;

"Run indefinitely" is an odd default.  Why not set the default number of iterations
to something reasonable and then let the user override that if the user wants to
run for an absurdly long time?

> +
> +volatile u64 *isr_counts;
> +bool use_svm;
> +int hlt_allowed = -1;

These can all be static.

> +
> +static int get_random(int min, int max)
> +{
> +	/* TODO : use rdrand to seed an PRNG instead */
> +	u64 random_value = rdtsc() >> 4;
> +
> +	return min + random_value % (max - min + 1);
> +}
> +
> +static void ipi_interrupt_handler(isr_regs_t *r)
> +{
> +	isr_counts[smp_id()]++;
> +	eoi();
> +}
> +
> +static void wait_for_ipi(volatile u64 *count)
> +{
> +	u64 old_count = *count;
> +	bool use_halt;
> +
> +	switch (hlt_allowed) {
> +	case -1:
> +		use_halt = get_random(0,10000) == 0;

Randomly doing "halt" is going to be annoying to debug.  What about tying the
this decision to the iteration and then providing a knob to let the user specify
the frequency?  It seems unlikely that this test will expose a bug that occurs
if and only if the halt path is truly random.

> +		break;
> +	case 0:
> +		use_halt = false;
> +		break;
> +	case 1:
> +		use_halt = true;
> +		break;
> +	default:
> +		use_halt = false;
> +		break;
> +	}
> +
> +	do {
> +		if (use_halt)
> +			asm volatile ("sti;hlt;cli\n");

safe_halt();

> +		else
> +			asm volatile ("sti;nop;cli");

sti_nop_cli();

> +
> +	} while (old_count == *count);

There's no need to loop in the use_halt case.  If KVM spuriously wakes the vCPU
from halt, then that's a KVM bug.  Kinda ugly, but it does provide meaningfully
coverage for the HLT case.

	if (use_halt) {
		safe_halt();
		cli();
	} else {
		do {
			sti_nop_cli();
		} while (old_count == *count);
	}

	assert(*count == old_count + 1);

> +}
> +
> +/******************************************************************************************************/
> +
> +#ifdef __x86_64__
> +
> +static void l2_guest_wait_for_ipi(volatile u64 *count)
> +{
> +	wait_for_ipi(count);
> +	asm volatile("vmmcall");
> +}
> +
> +static void l2_guest_dummy(void)
> +{
> +	asm volatile("vmmcall");
> +}
> +
> +static void wait_for_ipi_in_l2(volatile u64 *count, struct svm_vcpu *vcpu)
> +{
> +	u64 old_count = *count;
> +	bool irq_on_vmentry = get_random(0,1) == 0;

Same concerns about using random numbers.

> +
> +	vcpu->vmcb->save.rip = (ulong)l2_guest_wait_for_ipi;
> +	vcpu->regs.rdi = (u64)count;
> +
> +	vcpu->vmcb->save.rip = irq_on_vmentry ? (ulong)l2_guest_dummy : (ulong)l2_guest_wait_for_ipi;
> +
> +	do {
> +		if (irq_on_vmentry)
> +			vcpu->vmcb->save.rflags |= X86_EFLAGS_IF;
> +		else
> +			vcpu->vmcb->save.rflags &= ~X86_EFLAGS_IF;
> +
> +		asm volatile("clgi;nop;sti");

Why a NOP between CLGI and STI?  And why re-enable GIF on each iteration?

> +		// GIF is set by VMRUN
> +		SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
> +		// GIF is cleared by VMEXIT
> +		asm volatile("cli;nop;stgi");

Why re-enable GIF on every exit?

> +
> +		assert(vcpu->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
> +
> +	} while (old_count == *count);

Isn't the loop only necessary in the irq_on_vmentry case?

static void run_svm_l2(...)
{
	SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
	assert(vcpu->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
}

E.g. can't this be:

	bool irq_on_vmentry = ???;
	u64 old_count = *count;

	clgi();
	sti();

	vcpu->regs.rdi = (u64)count;

	if (!irq_on_vmentry) {
		vcpu->vmcb->save.rip = (ulong)l2_guest_wait_for_ipi;
		vcpu->vmcb->save.rflags &= ~X86_EFLAGS_IF;
		run_svm_l2(...);
	} else {
		vcpu->vmcb->save.rip = (ulong)l2_guest_dummy
		vcpu->vmcb->save.rflags |= X86_EFLAGS_IF;
		do {
			run_svm_l2(...);
		} while (old_count == *count);
	}

	assert(*count == old_count + 1);
	cli();
	stgi();

> +}
> +#endif
> +
> +/******************************************************************************************************/
> +
> +#define FIRST_TEST_VCPU 1
> +
> +static void vcpu_init(void *data)
> +{
> +	/* To make it easier to see iteration number in the trace */
> +	handle_irq(0x40, ipi_interrupt_handler);
> +	handle_irq(0x50, ipi_interrupt_handler);

Why not make it even more granular?  E.g. do vector == 32 + (iteration % ???)
Regardless, a #define for the (base) vector would be helpful, the usage in
vcpu_code() is a bit magical.


> +}
> +
> +static void vcpu_code(void *data)
> +{
> +	int ncpus = cpu_count();
> +	int cpu = (long)data;
> +#ifdef __x86_64__
> +	struct svm_vcpu vcpu;
> +#endif
> +
> +	u64 i;
> +
> +#ifdef __x86_64__
> +	if (cpu == 2 && use_svm)

Why only CPU2?

> +		svm_vcpu_init(&vcpu);
> +#endif
> +
> +	assert(cpu != 0);
> +
> +	if (cpu != FIRST_TEST_VCPU)
> +		wait_for_ipi(&isr_counts[cpu]);
> +
> +	for (i = 0; i < num_iterations; i++)
> +	{
> +		u8 physical_dst = cpu == ncpus -1 ? 1 : cpu + 1;

Space after the '-'.

> +
> +		// send IPI to a next vCPU in a circular fashion
> +		apic_icr_write(APIC_INT_ASSERT |
> +				APIC_DEST_PHYSICAL |
> +				APIC_DM_FIXED |
> +				(i % 2 ? 0x40 : 0x50),
> +				physical_dst);
> +
> +		if (i == (num_iterations - 1) && cpu != FIRST_TEST_VCPU)
> +			break;
> +
> +#ifdef __x86_64__
> +		// wait for the IPI interrupt chain to come back to us
> +		if (cpu == 2 && use_svm) {
> +				wait_for_ipi_in_l2(&isr_counts[cpu], &vcpu);

Indentation is funky.

> +				continue;
> +		}
> +#endif
> +		wait_for_ipi(&isr_counts[cpu]);
> +	}
> +}
> +
> +int main(int argc, void** argv)
> +{
> +	int cpu, ncpus = cpu_count();
> +
> +	assert(ncpus > 2);
> +
> +	if (argc > 1)
> +		hlt_allowed = atol(argv[1]);
> +
> +	if (argc > 2)
> +		num_iterations = atol(argv[2]);
> +
> +	setup_vm();
> +
> +#ifdef __x86_64__
> +	if (svm_supported()) {
> +		use_svm = true;
> +		setup_svm();
> +	}
> +#endif
> +
> +	isr_counts = (volatile u64 *)calloc(ncpus, sizeof(u64));
> +
> +	printf("found %d cpus\n", ncpus);
> +	printf("running for %lld iterations - test\n",
> +		(long long unsigned int)num_iterations);
> +
> +
> +	for (cpu = 0; cpu < ncpus; ++cpu)
> +		on_cpu_async(cpu, vcpu_init, (void *)(long)cpu);
> +
> +	/* now let all the vCPUs end the IPI function*/
> +	while (cpus_active() > 1)
> +		  pause();
> +
> +	printf("starting test on all cpus but 0...\n");
> +
> +	for (cpu = ncpus-1; cpu >= FIRST_TEST_VCPU; cpu--)

Spaces around the '-'.

> +		on_cpu_async(cpu, vcpu_code, (void *)(long)cpu);

Why not use smp_id() in vcpu_code()?  ipi_interrupt_handler() already relies on
that being correct.

> +
> +	printf("test started, waiting to end...\n");
> +
> +	while (cpus_active() > 1) {
> +
> +		unsigned long isr_count1, isr_count2;
> +
> +		isr_count1 = isr_counts[1];
> +		delay(5ULL*1000*1000*1000);

Please add a macro or two for nanoseconds/milliseconds/seconds or whatever this
expands to.

> +		isr_count2 = isr_counts[1];
> +
> +		if (isr_count1 == isr_count2) {
> +			printf("\n");
> +			printf("hang detected!!\n");
> +			break;
> +		} else {
> +			printf("made %ld IPIs \n", (isr_count2 - isr_count1)*(ncpus-1));
> +		}
> +	}
> +
> +	printf("\n");
> +
> +	for (cpu = 1; cpu < ncpus; ++cpu)
> +		report(isr_counts[cpu] == num_iterations,
> +				"Number of IPIs match (%lld)",

Indentation.

> +				(long long unsigned int)isr_counts[cpu]);

Print num_iterations, i.e. expected vs. actual?

> +
> +	free((void*)isr_counts);
> +	return report_summary();
> +}
> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> index ebb3fdfc..7655d2ba 100644
> --- a/x86/unittests.cfg
> +++ b/x86/unittests.cfg
> @@ -61,6 +61,11 @@ smp = 2
>  file = smptest.flat
>  smp = 3
>  
> +[ipi_stress]
> +file = ipi_stress.flat
> +extra_params = -cpu host,-x2apic,-svm,-hypervisor -global kvm-pit.lost_tick_policy=discard -machine kernel-irqchip=on -append '0 50000'

Why add all the SVM and HLT stuff and then effectively turn them off by default?
There's basically zero chance any other configuration will get regular testing.

And why not have multi configs, e.g. to run with and without x2APIC?

> +smp = 4
> +
>  [vmexit_cpuid]
>  file = vmexit.flat
>  extra_params = -append 'cpuid'
> -- 
> 2.26.3
> 

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow
  2022-10-20 18:01   ` Sean Christopherson
@ 2022-10-24 12:36     ` Maxim Levitsky
  2022-10-24 22:49       ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:36 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 18:01 +0000, Sean Christopherson wrote:
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > Tests that need interrupt shadow can't rely on irq_enable function anyway,
> > as its comment states,  and it is useful to know for sure that interrupts
> > are enabled after the call to this function.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  lib/x86/processor.h       | 9 ++++-----
> >  x86/apic.c                | 1 -
> >  x86/ioapic.c              | 1 -
> >  x86/svm_tests.c           | 9 ---------
> >  x86/tscdeadline_latency.c | 1 -
> >  x86/vmx_tests.c           | 7 -------
> >  6 files changed, 4 insertions(+), 24 deletions(-)
> > 
> > diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> > index 03242206..9db07346 100644
> > --- a/lib/x86/processor.h
> > +++ b/lib/x86/processor.h
> > @@ -720,13 +720,12 @@ static inline void irq_disable(void)
> >         asm volatile("cli");
> >  }
> >  
> > -/* Note that irq_enable() does not ensure an interrupt shadow due
> > - * to the vagaries of compiler optimizations.  If you need the
> > - * shadow, use a single asm with "sti" and the instruction after it.
> > - */
> >  static inline void irq_enable(void)
> >  {
> > -       asm volatile("sti");
> > +       asm volatile(
> > +                       "sti \n\t"
> 
> Formatting is odd.  Doesn't really matter, but I think this can simply be:
> 
> static inline void sti_nop(void)
> {
>         asm volatile("sti; nop");

"\n\t" is what gcc manual recommends for separating the assembly lines as you know from the gcc manual:
https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html
"You may place multiple assembler instructions together in a single asm string, separated by 
the characters normally used in assembly code for the system. A combination that works in 
most places is a newline to break the line, plus a tab character to move to the instruction 
field (written as ‘\n\t’). Some assemblers allow semicolons as a line separator. 
However, note that some assembler dialects use semicolons to start a comment"

Looks like gnu assembler does use semicolon for new statements and hash for comments 
but some assemblers do semicolon for comments.

I usually use just "\n", but the safest is "\n\t".


> }
> 
> 
> > +                       "nop\n\t"
> 
> I like the idea of a helper to enable IRQs and consume pending interrupts, but I
> think we should add a new helper instead of changing irq_enable().
> 
> Hmm, or alternatively, kill off irq_enable() and irq_disable() entirely and instead
> add sti_nop().  I like this idea even better.  The helpers are all x86-specific,
> so there's no need to add a layer of abstraction, and sti() + sti_nop() has the
> benefit of making it very clear what code is being emitted without having to come
> up with clever function names.
> 
> And I think we should go even further and provide a helper to do the entire sequence
> of enable->nop->disable, which is a very common pattern.  No idea what to call
> this one, though I suppose sti_nop_cli() would work.
> 
> My vote is to replace all irq_enable() and irq_disable() usage with sti() and cli(),
> and then introduce sti_nop() and sti_nop_cli() (or whatever it gets called) and
> convert users as appropriate.

OK.

> 
> > +       );
> >  }
> >  
> >  static inline void invlpg(volatile void *va)
> > diff --git a/x86/apic.c b/x86/apic.c
> > index 23508ad5..a8964d88 100644
> > --- a/x86/apic.c
> > +++ b/x86/apic.c
> > @@ -36,7 +36,6 @@ static void __test_tsc_deadline_timer(void)
> >      irq_enable();
> >  
> >      wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC));
> > -    asm volatile ("nop");
> 
> I'm not entirely sure the existing nop is necessary here, but it's a functional
> change since it hoists the nop above the WRMSR.  To be safe, probably best to
> leave this as-is for now.

I had doubts about this, IMHO both before and after are equally good, but anyway to be safe,
I'll revert this change.


> 
> >      report(tdt_count == 1, "tsc deadline timer");
> >      report(rdmsr(MSR_IA32_TSCDEADLINE) == 0, "tsc deadline timer clearing");
> >  }
> 
> ...
> 
> > diff --git a/x86/tscdeadline_latency.c b/x86/tscdeadline_latency.c
> > index a3bc4ea4..c54530dd 100644
> > --- a/x86/tscdeadline_latency.c
> > +++ b/x86/tscdeadline_latency.c
> > @@ -73,7 +73,6 @@ static void start_tsc_deadline_timer(void)
> >      irq_enable();
> >  
> >      wrmsr(MSR_IA32_TSCDEADLINE, rdmsr(MSR_IA32_TSC)+delta);
> > -    asm volatile ("nop");
> 
> Another functional change that should be skipped, at least for now.

OK.

> 


Best regards,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer
  2022-10-20 19:14   ` Sean Christopherson
@ 2022-10-24 12:37     ` Maxim Levitsky
  2022-10-24 16:10       ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:37 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 19:14 +0000, Sean Christopherson wrote:
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > Add a few functions to apic.c to make it easier to enable and disable
> > the local apic timer.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  lib/x86/apic.c | 37 +++++++++++++++++++++++++++++++++++++
> >  lib/x86/apic.h |  6 ++++++
> >  2 files changed, 43 insertions(+)
> > 
> > diff --git a/lib/x86/apic.c b/lib/x86/apic.c
> > index 5131525a..dc6d3862 100644
> > --- a/lib/x86/apic.c
> > +++ b/lib/x86/apic.c
> > @@ -256,3 +256,40 @@ void init_apic_map(void)
> >                         id_map[j++] = i;
> >         }
> >  }
> > +
> > +void apic_setup_timer(int vector, bool periodic)
> > +{
> > +       /* APIC runs with 'CPU core clock' divided by value in APIC_TDCR */
> > +
> > +       u32 lvtt = vector |
> > +                       (periodic ? APIC_LVT_TIMER_PERIODIC : APIC_LVT_TIMER_ONESHOT);
> 
> Rather than take @periodic, take the mode.  That way this funky ternary operator
> goes away and the callers are self-tdocumenting, e.g. this
> 
>         apic_setup_timer(TIMER_VECTOR, APIC_LVT_TIMER_PERIODIC);
> 
> is more obvious than
> 
>         apic_setup_timer(TIMER_VECTOR, true);

Makes sense. I also wanted to pass the divider, but ended up hardcoding it to 1.


>         
> > +
> > +       apic_cleanup_timer();
> > +       apic_write(APIC_TDCR, APIC_TDR_DIV_1);
> > +       apic_write(APIC_LVTT, lvtt);
> > +}
> > +
> > +void apic_start_timer(u32 counter)
> > +{
> > +       apic_write(APIC_TMICT, counter);
> > +}

Makes sense.


> > +
> > +void apic_stop_timer(void)
> > +{
> > +       apic_write(APIC_TMICT, 0);
> > +}
> > +
> > +void apic_cleanup_timer(void)
> > +{
> > +       u32 lvtt = apic_read(APIC_LVTT);
> > +
> > +       // stop the counter
> > +       apic_stop_timer();
> > +
> > +       // mask the timer interrupt
> > +       apic_write(APIC_LVTT, lvtt | APIC_LVT_MASKED);
> > +
> > +       // ensure that a pending timer is serviced
> > +       irq_enable();
> 
> Jumping back to the "nop" patch, I'm reinforcing my vote to add sti_nop().  I
> actually starting typing a response to say this is broken before remembering that
> a nop got added to irq_enable().

OK, although, for someone that doesn't know about the interrupt shadow (I guess most of the people that will look at this code),
the above won't confuse them, in fact sti_nop() might confuse someone who doesn't know about why this nop is needed.

Just a note.


Best regards,
	Maxim Levitsky

> 
> > +       irq_disable();
> > +}
> > diff --git a/lib/x86/apic.h b/lib/x86/apic.h
> > index 6d27f047..db691e2a 100644
> > --- a/lib/x86/apic.h
> > +++ b/lib/x86/apic.h
> > @@ -58,6 +58,12 @@ void disable_apic(void);
> >  void reset_apic(void);
> >  void init_apic_map(void);
> >  
> > +void apic_cleanup_timer(void);
> > +void apic_setup_timer(int vector, bool periodic);
> > +
> > +void apic_start_timer(u32 counter);
> > +void apic_stop_timer(void);
> > +
> >  /* Converts byte-addressable APIC register offset to 4-byte offset. */
> >  static inline u32 apic_reg_index(u32 reg)
> >  {
> > -- 
> > 2.26.3
> > 
> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 07/16] x86: Add a simple test for SYSENTER instruction.
  2022-10-20 19:25   ` Sean Christopherson
@ 2022-10-24 12:38     ` Maxim Levitsky
  0 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:38 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 19:25 +0000, Sean Christopherson wrote:
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > Run the test with Intel's vendor ID and in the long mode,
> > to test the emulation of this instruction on AMD.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  x86/Makefile.x86_64 |   2 +
> >  x86/sysenter.c      | 127 ++++++++++++++++++++++++++++++++++++++++++++
> >  x86/unittests.cfg   |   5 ++
> >  3 files changed, 134 insertions(+)
> >  create mode 100644 x86/sysenter.c
> > 
> > diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
> > index 865da07d..8ce53650 100644
> > --- a/x86/Makefile.x86_64
> > +++ b/x86/Makefile.x86_64
> > @@ -33,6 +33,7 @@ tests += $(TEST_DIR)/vmware_backdoors.$(exe)
> >  tests += $(TEST_DIR)/rdpru.$(exe)
> >  tests += $(TEST_DIR)/pks.$(exe)
> >  tests += $(TEST_DIR)/pmu_lbr.$(exe)
> > +tests += $(TEST_DIR)/sysenter.$(exe)
> >  
> >  
> >  ifeq ($(CONFIG_EFI),y)
> > @@ -60,3 +61,4 @@ $(TEST_DIR)/hyperv_clock.$(bin): $(TEST_DIR)/hyperv_clock.o
> >  $(TEST_DIR)/vmx.$(bin): $(TEST_DIR)/vmx_tests.o
> >  $(TEST_DIR)/svm.$(bin): $(TEST_DIR)/svm_tests.o
> >  $(TEST_DIR)/svm_npt.$(bin): $(TEST_DIR)/svm_npt.o
> > +$(TEST_DIR)/sysenter.o: CFLAGS += -Wa,-mintel64
> > diff --git a/x86/sysenter.c b/x86/sysenter.c
> > new file mode 100644
> > index 00000000..6c32fea4
> > --- /dev/null
> > +++ b/x86/sysenter.c
> > @@ -0,0 +1,127 @@
> > +#include "alloc.h"
> > +#include "libcflat.h"
> > +#include "processor.h"
> > +#include "msr.h"
> > +#include "desc.h"
> > +
> > +
> > +// undefine this to run the syscall instruction in 64 bit mode.
> > +// this won't work on AMD due to disabled code in the emulator.
> > +#define COMP32
> 
> Why not run the test in both 32-bit and 64-bit mode, and skip the 64-bit mode
> version if the vCPU model is AMD?

True, but on Intel the test won't test much since the instruction is not
emulated there.

It is also possible to enable the emulation on x86 on AMD as well,
there doesn't seem anything special and/or dangerous in the KVM emulator.

> 
> > +
> > +int main(int ac, char **av)
> > +{
> > +    extern void sysenter_target(void);
> > +    extern void test_done(void);
> 
> Tabs instead of spaces.
OK, I'll take a note.

> 
> > +
> > +    setup_vm();
> > +
> > +    int gdt_index = 0x50 >> 3;
> > +    ulong rax = 0xDEAD;
> > +
> > +    /* init the sysenter GDT block */
> > +    /*gdt64[gdt_index+0] = gdt64[KERNEL_CS >> 3];
> > +    gdt64[gdt_index+1] = gdt64[KERNEL_DS >> 3];
> > +    gdt64[gdt_index+2] = gdt64[USER_CS >> 3];
> > +    gdt64[gdt_index+3] = gdt64[USER_DS >> 3];*/
> > +
> > +    /* init the sysenter msrs*/
> > +    wrmsr(MSR_IA32_SYSENTER_CS, gdt_index << 3);
> > +    wrmsr(MSR_IA32_SYSENTER_ESP, 0xAAFFFFFFFF);
> > +    wrmsr(MSR_IA32_SYSENTER_EIP, (uint64_t)sysenter_target);
> > +
> > +    u8 *thunk = (u8*)malloc(50);
> > +    u8 *tmp = thunk;
> > +
> > +    printf("Thunk at 0x%lx\n", (u64)thunk);
> > +
> > +    /* movabs test_done, %rdx*/
> > +    *tmp++ = 0x48; *tmp++ = 0xBA;
> > +    *(u64 *)tmp = (uint64_t)test_done; tmp += 8;
> > +    /* jmp %%rdx*/
> > +    *tmp++ = 0xFF; *tmp++ = 0xe2;
> > +
> > +    asm volatile (
> 
> Can we add a helper sysenter_asm.S or whatever instead of making this a gigantic
> inline asm blob?  And then have separate routines for 32-bit vs. 64-bit?  That'd
> require a bit of code duplication, but macros could be used to dedup the common
> parts if necessary.
> 
> And with a .S file, I believe there's no need to dynamically generate the thunk,
> e.g. pass the jump target through a GPR that's not modified/used by SYSENTER.

I'll take a look, however since I wrote this test long ago and I am kind of short on time,
I prefer to merge it as is and then improve it as you suggested.

Best regards,
	Maxim Levitsky

> 
> > +#ifdef COMP32
> > +        "# switch to comp32, mode prior to running the test\n"
> > +        "ljmpl *1f\n"
> > +        "1:\n"
> > +        ".long 1f\n"
> > +        ".long " xstr(KERNEL_CS32) "\n"
> > +        "1:\n"
> > +        ".code32\n"
> > +#else
> > +               "# store the 64 bit thunk address to rdx\n"
> > +               "mov %[thunk], %%rdx\n"
> > +#endif
> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test.
  2022-10-20 19:06     ` Sean Christopherson
@ 2022-10-24 12:39       ` Maxim Levitsky
  0 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:39 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 19:06 +0000, Sean Christopherson wrote:
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > On Thu, 2022-10-20 at 18:23 +0300, Maxim Levitsky wrote:
> > > +static void svm_shutdown_intercept_test(void)
> > > +{
> > > +       void* unmapped_address = alloc_vpage();
> > > +
> > > +       /*
> > > +        * Test that shutdown vm exit doesn't crash L0
> > > +        *
> > > +        * Test both native and emulated triple fault
> > > +        * (due to exception merging)
> > > +        */
> > > +
> > > +
> > > +       /*
> > > +        * This will usually cause native SVM_EXIT_SHUTDOWN
> > > +        * (KVM usually doesn't intercept #PF)
> > > +        * */
> > > +       test_set_guest(shutdown_intercept_test_guest);
> > > +       vmcb->save.idtr.base = (u64)unmapped_address;
> > > +       vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
> > > +       svm_vmrun();
> > > +       report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (BP->PF->DF->TRIPLE_FAULT) test passed");
> > > +
> > > +       /*
> > > +        * This will usually cause emulated SVM_EXIT_SHUTDOWN
> > > +        * (KVM usually intercepts #UD)
> > > +        */
> > > +       test_set_guest(shutdown_intercept_test_guest2);
> > > +       vmcb_ident(vmcb);
> > > +       vmcb->save.idtr.limit = 0;
> > > +       vmcb->control.intercept |= (1ULL << INTERCEPT_SHUTDOWN);
> > > +       svm_vmrun();
> > > +       report (vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown (UD->DF->TRIPLE_FAULT) test passed");
> > > +}
> > > +
> > >  struct svm_test svm_tests[] = {
> > >         { "null", default_supported, default_prepare,
> > >           default_prepare_gif_clear, null_test,
> > > @@ -3382,6 +3432,7 @@ struct svm_test svm_tests[] = {
> > >         TEST(svm_intr_intercept_mix_smi),
> > >         TEST(svm_tsc_scale_test),
> > >         TEST(pause_filter_test),
> > > +       TEST(svm_shutdown_intercept_test),
> > >         { NULL, NULL, NULL, NULL, NULL, NULL, NULL }
> > >  };
> > 
> > Note that on unpatched KVM, this test will cause a kernel panic on the host
> > if run.
> > 
> > I sent a patch today with a fix for this.
> 
> I'm confused.  The KVM patches address a bug where KVM screws up if the SHUTDOWN
> (or INIT) is _not_ intercepted by L1, but the test here does intercept SHUTDOWN.
> Are there more bugs lurking in KVM, or am I missing something?

Yes you don't miss anything - it was a last minute change that I forgot about:

If you let shutdown to not be intercepted, and even if KVM works correclty,
it will still kill qemu, and thus not run other subtests of this test.

The test will still 'pass' silently, something that should be IMHO fixed, the test runner
should check the exit status of qemu or in some other way detect that qemu got shutdown instead
of returning normally.

I decided to make this test in selftests, which also has a bonus of not crashing the host kernel,
since the selftest will come after the fix.

And the above test checks it the other way around which is still a good test IMHO
(I do need to update the commit message though).

Best regards,
	Maxim Levitsky.



> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 11/16] svm: add svm_suported
  2022-10-20 18:21   ` Sean Christopherson
@ 2022-10-24 12:40     ` Maxim Levitsky
  0 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:40 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 18:21 +0000, Sean Christopherson wrote:
> s/suported/supported
> 
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> 
> Please provide a changelog.
> 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  lib/x86/svm_lib.h | 5 +++++
> >  x86/svm.c         | 2 +-
> >  2 files changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
> > index 04910281..2d13b066 100644
> > --- a/lib/x86/svm_lib.h
> > +++ b/lib/x86/svm_lib.h
> > @@ -4,6 +4,11 @@
> >  #include <x86/svm.h>
> >  #include "processor.h"
> >  
> > +static inline bool svm_supported(void)
> > +{
> > +       return this_cpu_has(X86_FEATURE_SVM);
> 
> Why add a wrapper?  The only reason NPT and a few others have wrappers is to
> play nice with svm_test's "bool (*supported)(void)" hook.

For consistency with other code. The other way around as you suggest is also reasonable.

> 
> I would rather go the opposite direction and get rid of the wrappers, which IMO
> only make it harder to understand what is being checked.



> 
> E.g. add a required_feature to the tests and use that for all X86_FEATURE_*
> checks instead of adding wrappers.  And unless there's a supported helper I'm not
> seeing, the .supported hook can go away entirely by adding a dedicated "smp_required"
> flag.

I rather not add the .required_feature, since tests might want to test for more that one feature.
It rather better (and more visible to user) to have the test itself check for all features
it need at the start of it).

So I rather would remove the .supported() at all.

Best regards,
	Maxim Levitsky

> 
> We'd probaby want helper macros for SMP vs. non-SMP, e.g.
> 
> #define SVM_V1_TEST(name, feature, ...)
>         { #name, feature, false, ... }
> #define SVM_SMP_V1_TEST(name, feature, ...)
>         { #name, feature, true, ... }
> 
> diff --git a/x86/svm.c b/x86/svm.c
> index 7aa3ebd2..2a412c27 100644
> --- a/x86/svm.c
> +++ b/x86/svm.c
> @@ -170,6 +170,7 @@ test_wanted(const char *name, char *filters[], int filter_count)
>  
>  int run_svm_tests(int ac, char **av, struct svm_test *svm_tests)
>  {
> +       bool smp_supported = cpu_count() > 1;
>         int i = 0;
>  
>         ac--;
> @@ -187,7 +188,10 @@ int run_svm_tests(int ac, char **av, struct svm_test *svm_tests)
>         for (; svm_tests[i].name != NULL; i++) {
>                 if (!test_wanted(svm_tests[i].name, av, ac))
>                         continue;
> -               if (svm_tests[i].supported && !svm_tests[i].supported())
> +               if (svm_tests[i].required_feature &&
> +                   !this_cpu_has(svm_tests[i].required_feature))
> +                       continue;
> +               if (svm_tests[i].smp_required && !smp_supported)
>                         continue;
>                 if (svm_tests[i].v2 == NULL) {
>                         if (svm_tests[i].on_vcpu) {
> diff --git a/x86/svm.h b/x86/svm.h
> index 0c40a086..632287ca 100644
> --- a/x86/svm.h
> +++ b/x86/svm.h
> @@ -9,7 +9,8 @@
>  
>  struct svm_test {
>         const char *name;
> -       bool (*supported)(void);
> +       u64 required_feature;
> +       bool smp_required;
>         void (*prepare)(struct svm_test *test);
>         void (*prepare_gif_clear)(struct svm_test *test);
>         void (*guest_func)(struct svm_test *test);
> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros
  2022-10-20 18:55   ` Sean Christopherson
@ 2022-10-24 12:45     ` Maxim Levitsky
  2022-10-24 19:56       ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:45 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 18:55 +0000, Sean Christopherson wrote:
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> 
> Changelog please.  This patch in particular is extremely difficult to review
> without some explanation of what is being done, and why.
> 
> If it's not too much trouble, splitting this over multiple patches would be nice.
> 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  lib/x86/svm_lib.h | 58 +++++++++++++++++++++++++++++++++++++++
> >  x86/svm.c         | 51 ++++++++++------------------------
> >  x86/svm.h         | 70 ++---------------------------------------------
> >  x86/svm_tests.c   | 24 ++++++++++------
> >  4 files changed, 91 insertions(+), 112 deletions(-)
> > 
> > diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
> > index 27c3b137..59db26de 100644
> > --- a/lib/x86/svm_lib.h
> > +++ b/lib/x86/svm_lib.h
> > @@ -71,4 +71,62 @@ u8* svm_get_io_bitmap(void);
> >  #define MSR_BITMAP_SIZE 8192
> >  
> >  
> > +struct svm_extra_regs
> 
> Why not just svm_gprs?  This could even include RAX by grabbing it from the VMCB
> after VMRUN.

I prefer to have a single source of truth - if I grab it from vmcb, then
it will have to be synced to vmcb on each vmrun, like the KVM does,
but it also has dirty registers bitmap and such. I prefer to keep it simple.

Plus there is also RSP in vmcb, and RFLAGS, and even RIP to some extent is a GPR.
To call this struct svm_gprs, I would have to include them there as well.
And also there is segment registers, etc, etc.

So instead of pretending that this struct contains all the GPRs of the guest
(or host while guest is running) I renamed it to state that it contains only
some gprs that SVM doesn't context switch.

> 
> > +{
> > +    u64 rbx;
> > +    u64 rcx;
> > +    u64 rdx;
> > +    u64 rbp;
> > +    u64 rsi;
> > +    u64 rdi;
> > +    u64 r8;
> > +    u64 r9;
> > +    u64 r10;
> > +    u64 r11;
> > +    u64 r12;
> > +    u64 r13;
> > +    u64 r14;
> > +    u64 r15;
> 
> Tab instead of spaces.
> 
> > +};
> > +
> > +#define SWAP_GPRS(reg) \
> > +               "xchg %%rcx, 0x08(%%" reg ")\n\t"       \
> 
> No need for 2-tab indentation.
> 
> > +               "xchg %%rdx, 0x10(%%" reg ")\n\t"       \
> > +               "xchg %%rbp, 0x18(%%" reg ")\n\t"       \
> > +               "xchg %%rsi, 0x20(%%" reg ")\n\t"       \
> > +               "xchg %%rdi, 0x28(%%" reg ")\n\t"       \
> > +               "xchg %%r8,  0x30(%%" reg ")\n\t"       \
> > +               "xchg %%r9,  0x38(%%" reg ")\n\t"       \
> > +               "xchg %%r10, 0x40(%%" reg ")\n\t"       \
> > +               "xchg %%r11, 0x48(%%" reg ")\n\t"       \
> > +               "xchg %%r12, 0x50(%%" reg ")\n\t"       \
> > +               "xchg %%r13, 0x58(%%" reg ")\n\t"       \
> > +               "xchg %%r14, 0x60(%%" reg ")\n\t"       \
> > +               "xchg %%r15, 0x68(%%" reg ")\n\t"       \
> > +               \
> 
> Extra line.
> 
> > +               "xchg %%rbx, 0x00(%%" reg ")\n\t"       \
> 
> Why is RBX last here, but first in the struct?  Ah, because the initial swap uses
> RBX as the scratch register.  Why use RAX for the post-VMRUN swap?  AFAICT, that's
> completely arbitrary.

Let me explain:

On entry to the guest the code has to save the host GPRs and then load the guest GPRs.

Host RAX and RBX are set by the gcc as I requested with "a" and "b" modifiers, but even
these should not be changed by the assembly code from the values set in the input.
(At least I haven't found a way to mark a register as both input and clobber)

Now RAX is the hardcoded input to VMRUN, thus I leave it alone, and use RBX as regs pointer,
which is restored to the guest value (and host value stored in the regs) at the end of SWAP_GPRs.

I could have used another GPR for regs pointer, but not RAX, because if I were to use RAX,
I would need then to restore it to vmcb pointer before vmrun, which will complicate the code.

That is what the kernel VMRUN code does though, however it doesn't preserve the host GPRs mostly,
relying on function ABI to preserve only registers that ABI states to be preserved.
IMHO all of this just doesn't matter much, as long as it works.

Now after the VMRUN, all have is useless RAX (points to VMCB) and RSP still pointing to the stack.
Guest values of these were stored to VMCB and host values restored from host save area by the CPU.

So after VMRUN no register can be touched but RAX, you can't even pop values from the stack,
since that would overwrite the guest value.

So on restore I pop from the stack the regs pointer to RAX, and using it I swap the guest and host GPRs.
and then I restore the RAX again to its VMCB pointer value.

I hope that explains why on entry I use RBX and on exit I use RAX.

If I switch to full blown assembly function for this, then I could do it.

Note though that my LBR tests do still need this as a macro because they must not do
any extra jumps/calls as these clobber the LBR registers.

> 
> > +
> > +
> 
> > +#define __SVM_VMRUN(vmcb, regs, label)          \
> > +               asm volatile (                          \
> 
> Unnecessarily deep indentation.
> 
> > +                       "vmload %%rax\n\t"                  \
> > +                       "push %%rax\n\t"                    \
> > +                       "push %%rbx\n\t"                    \
> > +                       SWAP_GPRS("rbx")                    \
> > +                       ".global " label "\n\t"             \
> > +                       label ": vmrun %%rax\n\t"           \
> > +                       "vmsave %%rax\n\t"                  \
> > +                       "pop %%rax\n\t"                     \
> > +                       SWAP_GPRS("rax")                    \
> > +                       "pop %%rax\n\t"                     \
> > +                       :                                   \
> > +                       : "a" (virt_to_phys(vmcb)),         \
> > +                         "b"(regs)                         \
> > +                       /* clobbers*/                       \
> > +                       : "memory"                          \
> > +               );
> 
> If we're going to rewrite this, why not turn it into a proper assembly routine?
> E.g. the whole test_run() noinline thing just so that vmrun_rip isn't redefined
> is gross.

I had limited time working on this, but yes it makes sense.
I see if I find time to do it.


> 
> > diff --git a/x86/svm.c b/x86/svm.c
> > index 37b4cd38..9484a6d1 100644
> > --- a/x86/svm.c
> > +++ b/x86/svm.c
> > @@ -76,11 +76,11 @@ static void test_thunk(struct svm_test *test)
> >         vmmcall();
> >  }
> >  
> > -struct regs regs;
> > +struct svm_extra_regs regs;
> >  
> > -struct regs get_regs(void)
> > +struct svm_extra_regs* get_regs(void)
> >  {
> > -       return regs;
> > +       return &regs;
> 
> This isn't strictly necessary, is it?  I.e. avoiding the copy can be done in a
> separate patch, no?
Yes.
> 
> > @@ -2996,7 +2998,7 @@ static void svm_lbrv_test1(void)
> >  
> >         wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
> >         DO_BRANCH(host_branch1);
> > -       SVM_BARE_VMRUN;
> > +       SVM_VMRUN(vmcb,regs);
> 
> Space after the comma.  Multiple cases below too.
> 
> >         dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
> >  
> >         if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) {
> > @@ -3011,6 +3013,8 @@ static void svm_lbrv_test1(void)
> >  
> >  static void svm_lbrv_test2(void)
> >  {
> > +       struct svm_extra_regs* regs = get_regs();
> > +
> >         report(true, "Test that without LBRV enabled, guest LBR state does 'leak' to the host(2)");
> >  
> >         vmcb->save.rip = (ulong)svm_lbrv_test_guest2;
> > @@ -3019,7 +3023,7 @@ static void svm_lbrv_test2(void)
> >         wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
> >         DO_BRANCH(host_branch2);
> >         wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
> > -       SVM_BARE_VMRUN;
> > +       SVM_VMRUN(vmcb,regs);
> >         dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
> >         wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
> >  
> > @@ -3035,6 +3039,8 @@ static void svm_lbrv_test2(void)
> >  
> >  static void svm_lbrv_nested_test1(void)
> >  {
> > +       struct svm_extra_regs* regs = get_regs();
> > +
> >         if (!lbrv_supported()) {
> >                 report_skip("LBRV not supported in the guest");
> >                 return;
> > @@ -3047,7 +3053,7 @@ static void svm_lbrv_nested_test1(void)
> >  
> >         wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
> >         DO_BRANCH(host_branch3);
> > -       SVM_BARE_VMRUN;
> > +       SVM_VMRUN(vmcb,regs);
> >         dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
> >         wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
> >  
> > @@ -3068,6 +3074,8 @@ static void svm_lbrv_nested_test1(void)
> >  
> >  static void svm_lbrv_nested_test2(void)
> >  {
> > +       struct svm_extra_regs* regs = get_regs();
> > +
> >         if (!lbrv_supported()) {
> >                 report_skip("LBRV not supported in the guest");
> >                 return;
> > @@ -3083,7 +3091,7 @@ static void svm_lbrv_nested_test2(void)
> >  
> >         wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
> >         DO_BRANCH(host_branch4);
> > -       SVM_BARE_VMRUN;
> > +       SVM_VMRUN(vmcb,regs);
> >         dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
> >         wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
> >  
> > -- 
> > 2.26.3
> > 
> 

Best regards,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 15/16] svm: introduce svm_vcpu
  2022-10-20 19:02   ` Sean Christopherson
@ 2022-10-24 12:46     ` Maxim Levitsky
  0 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:46 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 19:02 +0000, Sean Christopherson wrote:
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > This adds minimum amout of code to support tests that
> > run SVM on more that one vCPU.
> 
> s/that/than
> 
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  lib/x86/svm_lib.c |   9 +
> >  lib/x86/svm_lib.h |  10 +
> >  x86/svm.c         |  37 ++-
> >  x86/svm.h         |   5 +-
> >  x86/svm_npt.c     |  44 ++--
> >  x86/svm_tests.c   | 615 +++++++++++++++++++++++-----------------------
> >  6 files changed, 362 insertions(+), 358 deletions(-)
> > 
> > diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
> > index 2b067c65..1152c497 100644
> > --- a/lib/x86/svm_lib.c
> > +++ b/lib/x86/svm_lib.c
> > @@ -157,3 +157,12 @@ void vmcb_ident(struct vmcb *vmcb)
> >                 ctrl->tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
> >         }
> >  }
> > +
> > +void svm_vcpu_init(struct svm_vcpu *vcpu)
> > +{
> > +       vcpu->vmcb = alloc_page();
> > +       vmcb_ident(vcpu->vmcb);
> > +       memset(&vcpu->regs, 0, sizeof(vcpu->regs));
> > +       vcpu->stack = alloc_pages(4) + (PAGE_SIZE << 4);
> > +       vcpu->vmcb->save.rsp = (ulong)(vcpu->stack);
> > +}
> > diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
> > index 59db26de..c6957dba 100644
> > --- a/lib/x86/svm_lib.h
> > +++ b/lib/x86/svm_lib.h
> > @@ -89,6 +89,16 @@ struct svm_extra_regs
> >      u64 r15;
> >  };
> >  
> > +
> > +struct svm_vcpu
> > +{
> > +       struct vmcb *vmcb;
> > +       struct svm_extra_regs regs;
> > +       void *stack;
> > +};
> > +
> > +void svm_vcpu_init(struct svm_vcpu *vcpu);
> > +
> >  #define SWAP_GPRS(reg) \
> >                 "xchg %%rcx, 0x08(%%" reg ")\n\t"       \
> >                 "xchg %%rdx, 0x10(%%" reg ")\n\t"       \
> > diff --git a/x86/svm.c b/x86/svm.c
> > index 9484a6d1..7aa3ebd2 100644
> > --- a/x86/svm.c
> > +++ b/x86/svm.c
> > @@ -16,7 +16,7 @@
> >  #include "apic.h"
> >  #include "svm_lib.h"
> >  
> > -struct vmcb *vmcb;
> > +struct svm_vcpu vcpu0;
> 
> It's not strictly vCPU0, e.g. svm_init_intercept_test() deliberately runs on
> vCPU2, presumably to avoid running on the BSP?
> 
> Since this is churning a lot of code anyways, why not clean this all up and have
> run_svm_tests() dynamically allocate state instead of relying on global data?

Makes sense.

Best regards,
	Maxim Levitsky

> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 13/16] svm: move vmcb_ident to svm_lib.c
  2022-10-20 18:37   ` Sean Christopherson
@ 2022-10-24 12:46     ` Maxim Levitsky
  0 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:46 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 18:37 +0000, Sean Christopherson wrote:
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> 
> Changelog please.  
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  lib/x86/svm_lib.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/x86/svm_lib.h |  4 ++++
> 
> What about calling these simply svm.{c,h} and renaming x86/svm.{c,h} to something
> like svm_common.{c,h}?  Long term, it would be wonderful to rid of x86/svm.{c,h}
> by genericizing the test framework, e.g. there's a ton of duplicate code between
> SVM and VMX.

Makes sense.


> 
> >  x86/svm.c         | 54 -----------------------------------------------
> >  x86/svm.h         |  1 -
> >  4 files changed, 58 insertions(+), 55 deletions(-)
> > 
> > diff --git a/lib/x86/svm_lib.c b/lib/x86/svm_lib.c
> > index 9e82e363..2b067c65 100644
> > --- a/lib/x86/svm_lib.c
> > +++ b/lib/x86/svm_lib.c
> > @@ -103,3 +103,57 @@ void setup_svm(void)
> >  
> >         setup_npt();
> >  }
> > +
> > +void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
> > +                        u64 base, u32 limit, u32 attr)
> 
> Funky indentation and wrap.

> 
> void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, u64 base, u32 limit,
>                   u32 attr)
> 
> > +{
> > +       seg->selector = selector;
> > +       seg->attrib = attr;
> > +       seg->limit = limit;
> > +       seg->base = base;
> > +}
> > +
> > +void vmcb_ident(struct vmcb *vmcb)
> > +{
> > +       u64 vmcb_phys = virt_to_phys(vmcb);
> > +       struct vmcb_save_area *save = &vmcb->save;
> > +       struct vmcb_control_area *ctrl = &vmcb->control;
> > +       u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
> 
> Ugh, a #define for '3' and '9' (in lib/x86/desc.h?) would be nice, but that can
> be left for another day/patch.
Exactly.

> 
> > +               | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
> 
> Pre-existing mess, but can you move the '|' to the previous line?  And align the
> code?
> 
> > +       u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
> > +               | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
> 
> > on the previous line.

OK.
> 
>         u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK |
>                             SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
>         u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK |
>                             SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
> 


Best regards,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 16/16] add IPI loss stress test
  2022-10-20 20:23   ` Sean Christopherson
@ 2022-10-24 12:54     ` Maxim Levitsky
  2022-10-24 17:19       ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-24 12:54 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-20 at 20:23 +0000, Sean Christopherson wrote:
> On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > +u64 num_iterations = -1;
> 
> "Run indefinitely" is an odd default.  Why not set the default number of iterations
> to something reasonable and then let the user override that if the user wants to
> run for an absurdly long time?
> 
> > +
> > +volatile u64 *isr_counts;
> > +bool use_svm;
> > +int hlt_allowed = -1;
> 
> These can all be static.
> 
> > +
> > +static int get_random(int min, int max)
> > +{
> > +       /* TODO : use rdrand to seed an PRNG instead */
> > +       u64 random_value = rdtsc() >> 4;
> > +
> > +       return min + random_value % (max - min + 1);
> > +}
> > +
> > +static void ipi_interrupt_handler(isr_regs_t *r)
> > +{
> > +       isr_counts[smp_id()]++;
> > +       eoi();
> > +}
> > +
> > +static void wait_for_ipi(volatile u64 *count)
> > +{
> > +       u64 old_count = *count;
> > +       bool use_halt;
> > +
> > +       switch (hlt_allowed) {
> > +       case -1:
> > +               use_halt = get_random(0,10000) == 0;
> 
> Randomly doing "halt" is going to be annoying to debug.  What about tying the
> this decision to the iteration and then providing a knob to let the user specify
> the frequency?  It seems unlikely that this test will expose a bug that occurs
> if and only if the halt path is truly random.

This is stress test, it is pretty much impossible to debug, it is more like pass/fail test.
In addition to that as you see in the switch below the use_halt is trinary boolean,
it can be 0, 1 and -1, and the -1 means random, while 0,1 means that it will alway use halt.


> 
> > +               break;
> > +       case 0:
> > +               use_halt = false;
> > +               break;
> > +       case 1:
> > +               use_halt = true;
> > +               break;
> > +       default:
> > +               use_halt = false;
> > +               break;
> > +       }
> > +
> > +       do {
> > +               if (use_halt)
> > +                       asm volatile ("sti;hlt;cli\n");
> 
> safe_halt();
OK.

> 
> > +               else
> > +                       asm volatile ("sti;nop;cli");
> 
> sti_nop_cli();
I think you mean sti_nop(); cli();


> 
> > +
> > +       } while (old_count == *count);
> 
> There's no need to loop in the use_halt case.  If KVM spuriously wakes the vCPU
> from halt, then that's a KVM bug.  Kinda ugly, but it does provide meaningfully
> coverage for the HLT case.

Nope - KVM does spuriously wake up the CPU, for example when the vCPU thread recieves a signal
and anything else that makes the kvm_vcpu_check_block return -EINTR.


> 
>         if (use_halt) {
>                 safe_halt();
>                 cli();
>         } else {
>                 do {
>                         sti_nop_cli();
>                 } while (old_count == *count);
>         }
> 
>         assert(*count == old_count + 1);
> 
> > +}
> > +
> > +/******************************************************************************************************/
> > +
> > +#ifdef __x86_64__
> > +
> > +static void l2_guest_wait_for_ipi(volatile u64 *count)
> > +{
> > +       wait_for_ipi(count);
> > +       asm volatile("vmmcall");
> > +}
> > +
> > +static void l2_guest_dummy(void)
> > +{
> > +       asm volatile("vmmcall");
> > +}
> > +
> > +static void wait_for_ipi_in_l2(volatile u64 *count, struct svm_vcpu *vcpu)
> > +{
> > +       u64 old_count = *count;
> > +       bool irq_on_vmentry = get_random(0,1) == 0;
> 
> Same concerns about using random numbers.

I can also add a parameter to force this to true/false, or better long term,
is to provide a PRNG and just seed it with either RDRAND or a userspace given number.
RDRAND retrived value can be even printed so that the test can be replayed.

You know just like the tools we both worked on at Intel did....

In fact I'll just do it - just need to pick some open source PRNG code.
Do you happen to know a good one? Mersenne Twister? 

> 
> > +
> > +       vcpu->vmcb->save.rip = (ulong)l2_guest_wait_for_ipi;
> > +       vcpu->regs.rdi = (u64)count;
> > +
> > +       vcpu->vmcb->save.rip = irq_on_vmentry ? (ulong)l2_guest_dummy : (ulong)l2_guest_wait_for_ipi;
> > +
> > +       do {
> > +               if (irq_on_vmentry)
> > +                       vcpu->vmcb->save.rflags |= X86_EFLAGS_IF;
> > +               else
> > +                       vcpu->vmcb->save.rflags &= ~X86_EFLAGS_IF;
> > +
> > +               asm volatile("clgi;nop;sti");
> 
> Why a NOP between CLGI and STI?  And why re-enable GIF on each iteration?

Its a remain from the days I was too lazy to check which instructions have interrupt window.
Also still using comma here. I'll fix this.


> 
> > +               // GIF is set by VMRUN
> > +               SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
> > +               // GIF is cleared by VMEXIT
> > +               asm volatile("cli;nop;stgi");
> 
> Why re-enable GIF on every exit?

And why not? KVM does this on each VMRUN.

> 
> > +
> > +               assert(vcpu->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
> > +
> > +       } while (old_count == *count);
> 
> Isn't the loop only necessary in the irq_on_vmentry case?

Yes it is - the interrupts come from a different vCPU, so entering
the guest with IF set doesn't guarantee that it will get an
interrupt instantly, but the other way around is true,
with IF clear it will alway get the interrupt only after it set it later
in wait_for_ipi(). 

I need to rename irq_on_vmentry to IF_set_on_vmentry, or something.



> 
> static void run_svm_l2(...)
> {
>         SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
>         assert(vcpu->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
> }
> 
> E.g. can't this be:
> 
>         bool irq_on_vmentry = ???;
>         u64 old_count = *count;
> 
>         clgi();
>         sti();
> 
>         vcpu->regs.rdi = (u64)count;
> 
>         if (!irq_on_vmentry) {
>                 vcpu->vmcb->save.rip = (ulong)l2_guest_wait_for_ipi;
>                 vcpu->vmcb->save.rflags &= ~X86_EFLAGS_IF;
>                 run_svm_l2(...);
>         } else {
>                 vcpu->vmcb->save.rip = (ulong)l2_guest_dummy
>                 vcpu->vmcb->save.rflags |= X86_EFLAGS_IF;
>                 do {
>                         run_svm_l2(...);
>                 } while (old_count == *count);
>         }
> 
>         assert(*count == old_count + 1);
>         cli();
>         stgi();
> 
> > +}
> > +#endif
> > +
> > +/******************************************************************************************************/
> > +
> > +#define FIRST_TEST_VCPU 1
> > +
> > +static void vcpu_init(void *data)
> > +{
> > +       /* To make it easier to see iteration number in the trace */
> > +       handle_irq(0x40, ipi_interrupt_handler);
> > +       handle_irq(0x50, ipi_interrupt_handler);
> 
> Why not make it even more granular?  E.g. do vector == 32 + (iteration % ???)
> Regardless, a #define for the (base) vector would be helpful, the usage in
> vcpu_code() is a bit magical.

Don't see why not, but usually two vectors is enough. I can replace the magic
numbers with #defines.

> 
> 
> > +}
> > +
> > +static void vcpu_code(void *data)
> > +{
> > +       int ncpus = cpu_count();
> > +       int cpu = (long)data;
> > +#ifdef __x86_64__
> > +       struct svm_vcpu vcpu;
> > +#endif
> > +
> > +       u64 i;
> > +
> > +#ifdef __x86_64__
> > +       if (cpu == 2 && use_svm)
> 
> Why only CPU2?

Remain from the days when I had no code to run multiple guests.



> 
> > +               svm_vcpu_init(&vcpu);
> > +#endif
> > +
> > +       assert(cpu != 0);
> > +
> > +       if (cpu != FIRST_TEST_VCPU)
> > +               wait_for_ipi(&isr_counts[cpu]);
> > +
> > +       for (i = 0; i < num_iterations; i++)
> > +       {
> > +               u8 physical_dst = cpu == ncpus -1 ? 1 : cpu + 1;
> 
> Space after the '-'.
OK.

> 
> > +
> > +               // send IPI to a next vCPU in a circular fashion
> > +               apic_icr_write(APIC_INT_ASSERT |
> > +                               APIC_DEST_PHYSICAL |
> > +                               APIC_DM_FIXED |
> > +                               (i % 2 ? 0x40 : 0x50),
> > +                               physical_dst);
> > +
> > +               if (i == (num_iterations - 1) && cpu != FIRST_TEST_VCPU)
> > +                       break;
> > +
> > +#ifdef __x86_64__
> > +               // wait for the IPI interrupt chain to come back to us
> > +               if (cpu == 2 && use_svm) {
> > +                               wait_for_ipi_in_l2(&isr_counts[cpu], &vcpu);
> 
> Indentation is funky.
OK.
> 
> > +                               continue;
> > +               }
> > +#endif
> > +               wait_for_ipi(&isr_counts[cpu]);
> > +       }
> > +}
> > +
> > +int main(int argc, void** argv)
> > +{
> > +       int cpu, ncpus = cpu_count();
> > +
> > +       assert(ncpus > 2);
> > +
> > +       if (argc > 1)
> > +               hlt_allowed = atol(argv[1]);
> > +
> > +       if (argc > 2)
> > +               num_iterations = atol(argv[2]);
> > +
> > +       setup_vm();
> > +
> > +#ifdef __x86_64__
> > +       if (svm_supported()) {
> > +               use_svm = true;
> > +               setup_svm();
> > +       }
> > +#endif
> > +
> > +       isr_counts = (volatile u64 *)calloc(ncpus, sizeof(u64));
> > +
> > +       printf("found %d cpus\n", ncpus);
> > +       printf("running for %lld iterations - test\n",
> > +               (long long unsigned int)num_iterations);
> > +
> > +
> > +       for (cpu = 0; cpu < ncpus; ++cpu)
> > +               on_cpu_async(cpu, vcpu_init, (void *)(long)cpu);
> > +
> > +       /* now let all the vCPUs end the IPI function*/
> > +       while (cpus_active() > 1)
> > +                 pause();
> > +
> > +       printf("starting test on all cpus but 0...\n");
> > +
> > +       for (cpu = ncpus-1; cpu >= FIRST_TEST_VCPU; cpu--)
> 
> Spaces around the '-'.

I will find a way to run checkpatch.pl on the patches...

> 
> > +               on_cpu_async(cpu, vcpu_code, (void *)(long)cpu);
> 
> Why not use smp_id() in vcpu_code()?  ipi_interrupt_handler() already relies on
> that being correct.
> 
> > +
> > +       printf("test started, waiting to end...\n");
> > +
> > +       while (cpus_active() > 1) {
> > +
> > +               unsigned long isr_count1, isr_count2;
> > +
> > +               isr_count1 = isr_counts[1];
> > +               delay(5ULL*1000*1000*1000);
> 
> Please add a macro or two for nanoseconds/milliseconds/seconds or whatever this
> expands to.

That is the problem - the delay is just in TSC freq units, and knowing TSC freq
for some reason on x86 is next to impossible on AMD

(If someone from AMD listens, please add a CPUID for this!)


> 
> > +               isr_count2 = isr_counts[1];
> > +
> > +               if (isr_count1 == isr_count2) {
> > +                       printf("\n");
> > +                       printf("hang detected!!\n");
> > +                       break;
> > +               } else {
> > +                       printf("made %ld IPIs \n", (isr_count2 - isr_count1)*(ncpus-1));
> > +               }
> > +       }
> > +
> > +       printf("\n");
> > +
> > +       for (cpu = 1; cpu < ncpus; ++cpu)
> > +               report(isr_counts[cpu] == num_iterations,
> > +                               "Number of IPIs match (%lld)",
> 
> Indentation.
> 
> > +                               (long long unsigned int)isr_counts[cpu]);
> 
> Print num_iterations, i.e. expected vs. actual?
> 
> > +
> > +       free((void*)isr_counts);
> > +       return report_summary();
> > +}
> > diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> > index ebb3fdfc..7655d2ba 100644
> > --- a/x86/unittests.cfg
> > +++ b/x86/unittests.cfg
> > @@ -61,6 +61,11 @@ smp = 2
> >  file = smptest.flat
> >  smp = 3
> >  
> > +[ipi_stress]
> > +file = ipi_stress.flat
> > +extra_params = -cpu host,-x2apic,-svm,-hypervisor -global kvm-pit.lost_tick_policy=discard -machine kernel-irqchip=on -append '0 50000'
> 
> Why add all the SVM and HLT stuff and then effectively turn them off by default?
> There's basically zero chance any other configuration will get regular testing.

This is because this is a stress test and it is mostly useful to run manually for some time.
The svm and hlt should be enabled though, it is a leftover from the fact that I almost never run the test
from the kvm unit test main script, I'll enable these.




> 
> And why not have multi configs, e.g. to run with and without x2APIC?

Good idea as well, although I don't know if I want to slow down the kvm unit tests run too much.
No x2apic is mostly because AVIC used to not work without it. I can drop it now.
'-hypervisor' is also some leftover don't know why it is there.


Best regards,
	Maxim Levitsky

> 
> > +smp = 4
> > +
> >  [vmexit_cpuid]
> >  file = vmexit.flat
> >  extra_params = -append 'cpuid'
> > -- 
> > 2.26.3
> > 
> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer
  2022-10-24 12:37     ` Maxim Levitsky
@ 2022-10-24 16:10       ` Sean Christopherson
  2022-10-27 10:19         ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-24 16:10 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> On Thu, 2022-10-20 at 19:14 +0000, Sean Christopherson wrote:
> > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > +       // ensure that a pending timer is serviced
> > > +       irq_enable();
> > 
> > Jumping back to the "nop" patch, I'm reinforcing my vote to add sti_nop().  I
> > actually starting typing a response to say this is broken before remembering that
> > a nop got added to irq_enable().
> 
> OK, although, for someone that doesn't know about the interrupt shadow (I
> guess most of the people that will look at this code), the above won't
> confuse them, in fact sti_nop() might confuse someone who doesn't know about
> why this nop is needed.

The difference is that sti_nop() might leave unfamiliar readers asking "why", but
it won't actively mislead them.  And the "why" can be easily answered by a comment
above sti_nop() to describe its purpose.  A "see also safe_halt()" with a comment
there would be extra helpful, as "safe halt" is the main reason the STI shadow is
even a thing.

On the other hand, shoving a NOP into irq_enable() is pretty much guaranteed to
cause problems for readers that do know about STI shadows since there's nothing
in the name "irq_enable" that suggests that the helper also intentionally eats the
interrupt shadow, and especically because the kernel's local_irq_enable() distills
down to a bare STI.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 16/16] add IPI loss stress test
  2022-10-24 12:54     ` Maxim Levitsky
@ 2022-10-24 17:19       ` Sean Christopherson
  2022-10-27 11:00         ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-24 17:19 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> On Thu, 2022-10-20 at 20:23 +0000, Sean Christopherson wrote:
> > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > +static void wait_for_ipi(volatile u64 *count)
> > > +{
> > > +       u64 old_count = *count;
> > > +       bool use_halt;
> > > +
> > > +       switch (hlt_allowed) {
> > > +       case -1:
> > > +               use_halt = get_random(0,10000) == 0;
> > 
> > Randomly doing "halt" is going to be annoying to debug.  What about tying the
> > this decision to the iteration and then providing a knob to let the user specify
> > the frequency?  It seems unlikely that this test will expose a bug that occurs
> > if and only if the halt path is truly random.
> 
> This is stress test, it is pretty much impossible to debug, it is more like
> pass/fail test.

There's a big difference between "hard to debug because there's a lot going on"
and "hard to debug because failures are intermittent due to use of random numbers
with no way to ensure a deterministic sequence.  I completely understand that this
type of test is going to be really hard to debug, but that's argument for making
the test as deterministic as possible, i.e. do what we can to make it slightly
less awful to debug.

> > > +                       asm volatile ("sti;nop;cli");
> > 
> > sti_nop_cli();
> I think you mean sti_nop(); cli();

I was thinking we could add another helper since it's such a common pattern.

> > > +
> > > +       } while (old_count == *count);
> > 
> > There's no need to loop in the use_halt case.  If KVM spuriously wakes the vCPU
> > from halt, then that's a KVM bug.  Kinda ugly, but it does provide meaningfully
> > coverage for the HLT case.
> 
> Nope - KVM does spuriously wake up the CPU, for example when the vCPU thread
> recieves a signal and anything else that makes the kvm_vcpu_check_block
> return -EINTR.

That doesn't (and shouldn't) wake the vCPU from the guest's perspective.  If/when
userspace calls KVM_RUN again, the vCPU's state should still be KVM_MP_STATE_HALTED
and thus KVM will invoke vcpu_block() until there is an actual wake event.

This is something that KVM _must_ get correct,

> > > +static void wait_for_ipi_in_l2(volatile u64 *count, struct svm_vcpu *vcpu)
> > > +{
> > > +       u64 old_count = *count;
> > > +       bool irq_on_vmentry = get_random(0,1) == 0;
> > 
> > Same concerns about using random numbers.
> 
> I can also add a parameter to force this to true/false, or better long term,
> is to provide a PRNG and just seed it with either RDRAND or a userspace given number.
> RDRAND retrived value can be even printed so that the test can be replayed.
> 
> You know just like the tools we both worked on at Intel did....
> 
> In fact I'll just do it - just need to pick some open source PRNG code.
> Do you happen to know a good one? Mersenne Twister? 

It probably makes sense to use whatever we end up using for selftests[*] in order
to minimize the code we have to maintain.

[*] https://lore.kernel.org/all/20221019221321.3033920-2-coltonlewis@google.com

> > > +               // GIF is set by VMRUN
> > > +               SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
> > > +               // GIF is cleared by VMEXIT
> > > +               asm volatile("cli;nop;stgi");
> > 
> > Why re-enable GIF on every exit?
> 
> And why not? KVM does this on each VMRUN.

Because doing work for no discernible reason is confusing.  E.g. if this were a
"real" hypervisor, it should also context switch CR2.

KVM enables STGI because GIF=0 blocks _all_ interrupts, i.e. KVM needs to recognize
NMI, SMI, #MC, etc... asap and even if KVM stays in its tight run loop.  For KUT,
there should be never be an NMI, SMI, #MC, etc... and so no need to enable GIF.

I suppose you could make the argument that the test should set GIF when running on
bare metal, but that's tenuous at best as SMI is the only event that isn't fatal to
the test.

> > > +
> > > +       printf("test started, waiting to end...\n");
> > > +
> > > +       while (cpus_active() > 1) {
> > > +
> > > +               unsigned long isr_count1, isr_count2;
> > > +
> > > +               isr_count1 = isr_counts[1];
> > > +               delay(5ULL*1000*1000*1000);
> > 
> > Please add a macro or two for nanoseconds/milliseconds/seconds or whatever this
> > expands to.
> 
> That is the problem - the delay is just in TSC freq units, and knowing TSC freq
> for some reason on x86 is next to impossible on AMD

Ah, delay() takes the number cycles.  Ugh.

We should fix that, e.g. use the CPUID-provided frequency when possible (KVM should
emulate this if it doesn't already), and then #define an arbitrary TSC frequency as
a fall back so that we can write readable code, e.g. 2.4Ghz is probably close enough
to work.

> > And why not have multi configs, e.g. to run with and without x2APIC?
> 
> Good idea as well, although I don't know if I want to slow down the kvm unit
> tests run too much.

We should add a way to flag and omit all "slow" tests, e.g. vmx_vmcs_shadow_test
takes an absurd amount of time and is uninteresting for the vast majority of changes.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros
  2022-10-24 12:45     ` Maxim Levitsky
@ 2022-10-24 19:56       ` Sean Christopherson
  2022-10-27 12:07         ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-24 19:56 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> On Thu, 2022-10-20 at 18:55 +0000, Sean Christopherson wrote:
> > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > 
> > Changelog please.  This patch in particular is extremely difficult to review
> > without some explanation of what is being done, and why.
> > 
> > If it's not too much trouble, splitting this over multiple patches would be nice.
> > 
> > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > ---
> > >  lib/x86/svm_lib.h | 58 +++++++++++++++++++++++++++++++++++++++
> > >  x86/svm.c         | 51 ++++++++++------------------------
> > >  x86/svm.h         | 70 ++---------------------------------------------
> > >  x86/svm_tests.c   | 24 ++++++++++------
> > >  4 files changed, 91 insertions(+), 112 deletions(-)
> > > 
> > > diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
> > > index 27c3b137..59db26de 100644
> > > --- a/lib/x86/svm_lib.h
> > > +++ b/lib/x86/svm_lib.h
> > > @@ -71,4 +71,62 @@ u8* svm_get_io_bitmap(void);
> > >  #define MSR_BITMAP_SIZE 8192
> > >  
> > >  
> > > +struct svm_extra_regs
> > 
> > Why not just svm_gprs?  This could even include RAX by grabbing it from the VMCB
> > after VMRUN.
> 
> I prefer to have a single source of truth - if I grab it from vmcb, then
> it will have to be synced to vmcb on each vmrun, like the KVM does,
> but it also has dirty registers bitmap and such.

KUT doesn't need a dirty registers bitmap.  That's purely a performance optimization
for VMX so that KVM can avoid unnecessary VMWRITEs for RIP and RSP.  E.g. SVM
ignores the dirty bitmap entirely:

  static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
  {
	struct vcpu_svm *svm = to_svm(vcpu);

	trace_kvm_entry(vcpu);

	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];

	...

  }

And even for VMX, I can't imagine a nVMX test will ever be so performance
sensitive that an extra VMWRITE for RSP will be a problem.

> I prefer to keep it simple.

The issue is simplifying the assembly code increases the complexity of the users.
E.g. users and readers need to understand what "extra regs", which means documenting
what is included and what's not.  On the other hand, the assembly is already quite
complex, adding a few lines to swap RAX and RSP doesn't really change the overall
of complexity of that low level code.

The other bit of complexity is that if a test wants to access all GPRs, it needs
both this struct and the VMCB.  RSP is unlikely to be problematic, but I can see
guest.RAX being something a test wants access to.

> Plus there is also RSP in vmcb, and RFLAGS, and even RIP to some extent is a GPR.

RIP is definitely not a GPR, it has no assigned index.  RFLAGS is also not a GPR.

> To call this struct svm_gprs, I would have to include them there as well.

RAX and RSP are the only GPRs that need to be moved to/from the VMCB.  

> And also there is segment registers, etc, etc.

Which aren't GPRs.

> So instead of pretending that this struct contains all the GPRs of the guest
> (or host while guest is running) I renamed it to state that it contains only
> some gprs that SVM doesn't context switch.

...

> > > +               "xchg %%rdx, 0x10(%%" reg ")\n\t"       \
> > > +               "xchg %%rbp, 0x18(%%" reg ")\n\t"       \
> > > +               "xchg %%rsi, 0x20(%%" reg ")\n\t"       \
> > > +               "xchg %%rdi, 0x28(%%" reg ")\n\t"       \
> > > +               "xchg %%r8,  0x30(%%" reg ")\n\t"       \
> > > +               "xchg %%r9,  0x38(%%" reg ")\n\t"       \
> > > +               "xchg %%r10, 0x40(%%" reg ")\n\t"       \
> > > +               "xchg %%r11, 0x48(%%" reg ")\n\t"       \
> > > +               "xchg %%r12, 0x50(%%" reg ")\n\t"       \
> > > +               "xchg %%r13, 0x58(%%" reg ")\n\t"       \
> > > +               "xchg %%r14, 0x60(%%" reg ")\n\t"       \
> > > +               "xchg %%r15, 0x68(%%" reg ")\n\t"       \
> > > +               \
> > 
> > Extra line.
> > 
> > > +               "xchg %%rbx, 0x00(%%" reg ")\n\t"       \
> > 
> > Why is RBX last here, but first in the struct?  Ah, because the initial swap uses
> > RBX as the scratch register.  Why use RAX for the post-VMRUN swap?  AFAICT, that's
> > completely arbitrary.
> 
> Let me explain:
> 
> On entry to the guest the code has to save the host GPRs and then load the guest GPRs.
> 
> Host RAX and RBX are set by the gcc as I requested with "a" and "b"
> modifiers, but even these should not be changed by the assembly code from the
> values set in the input.
> (At least I haven't found a way to mark a register as both input and clobber)

The way to achive input+clobber is to use input+output, i.e. "+b" (regs), but I
think that's a moot point...

> Now RAX is the hardcoded input to VMRUN, thus I leave it alone, and use RBX
> as regs pointer, which is restored to the guest value (and host value stored
> in the regs) at the end of SWAP_GPRs.

...because SWAP_GPRs isn't the end of the asm blob.  As long as RBX holds the
same value (regs) at the end of the asm blob, no clobbering is necessary even if
RBX is changed within the blob.

> If I switch to full blown assembly function for this, then I could do it.
> 
> Note though that my LBR tests do still need this as a macro because they must
> not do any extra jumps/calls as these clobber the LBR registers.

Shouldn't it be fairly easy to account for the CALL in the asm routine?  Taking
on that sort of dependency is quite gross, but it'd likely be less maintenance
in the long run than an inline asm blob.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow
  2022-10-24 12:36     ` Maxim Levitsky
@ 2022-10-24 22:49       ` Sean Christopherson
  2022-10-27 10:16         ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-24 22:49 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> On Thu, 2022-10-20 at 18:01 +0000, Sean Christopherson wrote:
> > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > Tests that need interrupt shadow can't rely on irq_enable function anyway,
> > > as its comment states,  and it is useful to know for sure that interrupts
> > > are enabled after the call to this function.
> > > 
> > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > ---
> > >  lib/x86/processor.h       | 9 ++++-----
> > >  x86/apic.c                | 1 -
> > >  x86/ioapic.c              | 1 -
> > >  x86/svm_tests.c           | 9 ---------
> > >  x86/tscdeadline_latency.c | 1 -
> > >  x86/vmx_tests.c           | 7 -------
> > >  6 files changed, 4 insertions(+), 24 deletions(-)
> > > 
> > > diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> > > index 03242206..9db07346 100644
> > > --- a/lib/x86/processor.h
> > > +++ b/lib/x86/processor.h
> > > @@ -720,13 +720,12 @@ static inline void irq_disable(void)
> > >         asm volatile("cli");
> > >  }
> > >  
> > > -/* Note that irq_enable() does not ensure an interrupt shadow due
> > > - * to the vagaries of compiler optimizations.  If you need the
> > > - * shadow, use a single asm with "sti" and the instruction after it.
> > > - */
> > >  static inline void irq_enable(void)
> > >  {
> > > -       asm volatile("sti");
> > > +       asm volatile(
> > > +                       "sti \n\t"
> > 
> > Formatting is odd.  Doesn't really matter, but I think this can simply be:
> > 
> > static inline void sti_nop(void)
> > {
> >         asm volatile("sti; nop");
> 
> "\n\t" is what gcc manual recommends for separating the assembly lines as you
> know from the gcc manual:
> https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html "You may place multiple
> assembler instructions together in a single asm string, separated by  the
> characters normally used in assembly code for the system. A combination that
> works in  most places is a newline to break the line, plus a tab character to
> move to the instruction  field (written as ‘\n\t’). Some assemblers allow
> semicolons as a line separator.  However, note that some assembler dialects
> use semicolons to start a comment"
> 
> Looks like gnu assembler does use semicolon for new statements and hash for comments 
> but some assemblers do semicolon for comments.
> 
> I usually use just "\n", but the safest is "\n\t".

I'm pretty sure we can ignore GCC's warning here and maximize readability.  There
are already plenty of asm blobs that use a semicolon.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow
  2022-10-24 22:49       ` Sean Christopherson
@ 2022-10-27 10:16         ` Maxim Levitsky
  2022-10-27 15:50           ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-27 10:16 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Mon, 2022-10-24 at 22:49 +0000, Sean Christopherson wrote:
> On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> > On Thu, 2022-10-20 at 18:01 +0000, Sean Christopherson wrote:
> > > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > > Tests that need interrupt shadow can't rely on irq_enable function anyway,
> > > > as its comment states,  and it is useful to know for sure that interrupts
> > > > are enabled after the call to this function.
> > > > 
> > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > > ---
> > > >  lib/x86/processor.h       | 9 ++++-----
> > > >  x86/apic.c                | 1 -
> > > >  x86/ioapic.c              | 1 -
> > > >  x86/svm_tests.c           | 9 ---------
> > > >  x86/tscdeadline_latency.c | 1 -
> > > >  x86/vmx_tests.c           | 7 -------
> > > >  6 files changed, 4 insertions(+), 24 deletions(-)
> > > > 
> > > > diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> > > > index 03242206..9db07346 100644
> > > > --- a/lib/x86/processor.h
> > > > +++ b/lib/x86/processor.h
> > > > @@ -720,13 +720,12 @@ static inline void irq_disable(void)
> > > >         asm volatile("cli");
> > > >  }
> > > >  
> > > > -/* Note that irq_enable() does not ensure an interrupt shadow due
> > > > - * to the vagaries of compiler optimizations.  If you need the
> > > > - * shadow, use a single asm with "sti" and the instruction after it.
> > > > - */
> > > >  static inline void irq_enable(void)
> > > >  {
> > > > -       asm volatile("sti");
> > > > +       asm volatile(
> > > > +                       "sti \n\t"
> > > 
> > > Formatting is odd.  Doesn't really matter, but I think this can simply be:
> > > 
> > > static inline void sti_nop(void)
> > > {
> > >         asm volatile("sti; nop");
> > 
> > "\n\t" is what gcc manual recommends for separating the assembly lines as you
> > know from the gcc manual:
> > https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html "You may place multiple
> > assembler instructions together in a single asm string, separated by  the
> > characters normally used in assembly code for the system. A combination that
> > works in  most places is a newline to break the line, plus a tab character to
> > move to the instruction  field (written as ‘\n\t’). Some assemblers allow
> > semicolons as a line separator.  However, note that some assembler dialects
> > use semicolons to start a comment"
> > 
> > Looks like gnu assembler does use semicolon for new statements and hash for comments 
> > but some assemblers do semicolon for comments.
> > 
> > I usually use just "\n", but the safest is "\n\t".
> 
> I'm pretty sure we can ignore GCC's warning here and maximize readability.  There
> are already plenty of asm blobs that use a semicolon.

IMHO this is corner cutting and you yourself said that this is wrong.

The other instances which use semicolon should be fixed IMHO.

Best regards,
	Maxim Levitsky


> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer
  2022-10-24 16:10       ` Sean Christopherson
@ 2022-10-27 10:19         ` Maxim Levitsky
  2022-10-27 15:54           ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-27 10:19 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Mon, 2022-10-24 at 16:10 +0000, Sean Christopherson wrote:
> On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> > On Thu, 2022-10-20 at 19:14 +0000, Sean Christopherson wrote:
> > > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > > +       // ensure that a pending timer is serviced
> > > > +       irq_enable();
> > > 
> > > Jumping back to the "nop" patch, I'm reinforcing my vote to add sti_nop().  I
> > > actually starting typing a response to say this is broken before remembering that
> > > a nop got added to irq_enable().
> > 
> > OK, although, for someone that doesn't know about the interrupt shadow (I
> > guess most of the people that will look at this code), the above won't
> > confuse them, in fact sti_nop() might confuse someone who doesn't know about
> > why this nop is needed.
> 
> The difference is that sti_nop() might leave unfamiliar readers asking "why", but
> it won't actively mislead them.  And the "why" can be easily answered by a comment
> above sti_nop() to describe its purpose.  A "see also safe_halt()" with a comment
> there would be extra helpful, as "safe halt" is the main reason the STI shadow is
> even a thing.
> 
> On the other hand, shoving a NOP into irq_enable() is pretty much guaranteed to
> cause problems for readers that do know about STI shadows since there's nothing
> in the name "irq_enable" that suggests that the helper also intentionally eats the
> interrupt shadow, and especically because the kernel's local_irq_enable() distills
> down to a bare STI.

I still don't agree with you on this at all. I would like to hear what other KVM developers
think about it.

safe_halt actually is a example for function that abstacts away the nop - just what I want to do.

A comment in irq_enable() about that nop also is fine to have.

Best regards,
	Maxim Levitsky


> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 16/16] add IPI loss stress test
  2022-10-24 17:19       ` Sean Christopherson
@ 2022-10-27 11:00         ` Maxim Levitsky
  2022-10-27 18:41           ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-27 11:00 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Mon, 2022-10-24 at 17:19 +0000, Sean Christopherson wrote:
> On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> > On Thu, 2022-10-20 at 20:23 +0000, Sean Christopherson wrote:
> > > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > > +static void wait_for_ipi(volatile u64 *count)
> > > > +{
> > > > +       u64 old_count = *count;
> > > > +       bool use_halt;
> > > > +
> > > > +       switch (hlt_allowed) {
> > > > +       case -1:
> > > > +               use_halt = get_random(0,10000) == 0;
> > > 
> > > Randomly doing "halt" is going to be annoying to debug.  What about tying the
> > > this decision to the iteration and then providing a knob to let the user specify
> > > the frequency?  It seems unlikely that this test will expose a bug that occurs
> > > if and only if the halt path is truly random.
> > 
> > This is stress test, it is pretty much impossible to debug, it is more like
> > pass/fail test.
> 
> There's a big difference between "hard to debug because there's a lot going on"
> and "hard to debug because failures are intermittent due to use of random numbers
> with no way to ensure a deterministic sequence.  I completely understand that this
> type of test is going to be really hard to debug, but that's argument for making
> the test as deterministic as possible, i.e. do what we can to make it slightly
> less awful to debug.

I agree with you mostly, but I think that using a PRNG and a seed is the best way
to acheeve both randomness and determenism at the same time.

> 
> > > > +                       asm volatile ("sti;nop;cli");
> > > 
> > > sti_nop_cli();
> > I think you mean sti_nop(); cli();
> 
> I was thinking we could add another helper since it's such a common pattern.

This is a good overall idea.

I;ll would call it process_pending_interrupts() with a comment that it
enables interrupts for one CPU cycle.

And BTW I also do use that semicolon, because I either forgot about the gcc rule
or I didn't knew about it. I'll fix it.


> 
> > > > +
> > > > +       } while (old_count == *count);
> > > 
> > > There's no need to loop in the use_halt case.  If KVM spuriously wakes the vCPU
> > > from halt, then that's a KVM bug.  Kinda ugly, but it does provide meaningfully
> > > coverage for the HLT case.
> > 
> > Nope - KVM does spuriously wake up the CPU, for example when the vCPU thread
> > recieves a signal and anything else that makes the kvm_vcpu_check_block
> > return -EINTR.
> 
> That doesn't (and shouldn't) wake the vCPU from the guest's perspective.  If/when
> userspace calls KVM_RUN again, the vCPU's state should still be KVM_MP_STATE_HALTED
> and thus KVM will invoke vcpu_block() until there is an actual wake event.

Well HLT is allowed to do suprious wakeups so KVM is allowed to not do it correclty,
so I thought that KVM doesn't do this correclty, but I glad to hear that it does.
Thanks for the explanation!

I'll test if my test passes if I remove the loop in the halt case.

> 
> This is something that KVM _must_ get correct,
> 
> > > > +static void wait_for_ipi_in_l2(volatile u64 *count, struct svm_vcpu *vcpu)
> > > > +{
> > > > +       u64 old_count = *count;
> > > > +       bool irq_on_vmentry = get_random(0,1) == 0;
> > > 
> > > Same concerns about using random numbers.
> > 
> > I can also add a parameter to force this to true/false, or better long term,
> > is to provide a PRNG and just seed it with either RDRAND or a userspace given number.
> > RDRAND retrived value can be even printed so that the test can be replayed.
> > 
> > You know just like the tools we both worked on at Intel did....
> > 
> > In fact I'll just do it - just need to pick some open source PRNG code.
> > Do you happen to know a good one? Mersenne Twister? 
> 
> It probably makes sense to use whatever we end up using for selftests[*] in order
> to minimize the code we have to maintain.
> 
> [*] https://lore.kernel.org/all/20221019221321.3033920-2-coltonlewis@google.com

Makes sense. I'll then just take this generator and adopt it to the kvm unit tests.
Or do you want to actually share the code? via a kernel header or something?

> 
> > > > +               // GIF is set by VMRUN
> > > > +               SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
> > > > +               // GIF is cleared by VMEXIT
> > > > +               asm volatile("cli;nop;stgi");
> > > 
> > > Why re-enable GIF on every exit?
> > 
> > And why not? KVM does this on each VMRUN.
> 
> Because doing work for no discernible reason is confusing.  E.g. if this were a
> "real" hypervisor, it should also context switch CR2.

I agree that my justification for this was not correct, but I might still want
to have the gif toggling here, because I think it might add some value to the test.
I'll think about it.

> 
> KVM enables STGI because GIF=0 blocks _all_ interrupts, i.e. KVM needs to recognize
> NMI, SMI, #MC, etc... asap and even if KVM stays in its tight run loop.  For KUT,
> there should be never be an NMI, SMI, #MC, etc... and so no need to enable GIF.
> 
> I suppose you could make the argument that the test should set GIF when running on
> bare metal, but that's tenuous at best as SMI is the only event that isn't fatal to
> the test.
> 
> > > > +
> > > > +       printf("test started, waiting to end...\n");
> > > > +
> > > > +       while (cpus_active() > 1) {
> > > > +
> > > > +               unsigned long isr_count1, isr_count2;
> > > > +
> > > > +               isr_count1 = isr_counts[1];
> > > > +               delay(5ULL*1000*1000*1000);
> > > 
> > > Please add a macro or two for nanoseconds/milliseconds/seconds or whatever this
> > > expands to.
> > 
> > That is the problem - the delay is just in TSC freq units, and knowing TSC freq
> > for some reason on x86 is next to impossible on AMD
> 
> Ah, delay() takes the number cycles.  Ugh.
> 
> We should fix that, e.g. use the CPUID-provided frequency when possible (KVM should
> emulate this if it doesn't already), and then #define an arbitrary TSC frequency as
> a fall back so that we can write readable code, e.g. 2.4Ghz is probably close enough
> to work.

KVM doesn't emulate the Intel's specific way of reporting TSC freq on AMD.
In some sense this is wrong to do as it is Intel specific.

I do think that it is a great idea to report the TSC freq via some KVM specific MSR.
That might though open a pandora box in regard to migration.

I don't like the 2.4Ghz idea at all - it is once again corner cutting. Its true
that most code doens't need exact delay, not to mention that delay is never going
to be exact, but once you expose (nano)second based interface, test writers
will start to use it, and then wonder why someone hardcoded it to 2.4 GHz.

> 
> > > And why not have multi configs, e.g. to run with and without x2APIC?
> > 
> > Good idea as well, although I don't know if I want to slow down the kvm unit
> > tests run too much.
> 
> We should add a way to flag and omit all "slow" tests, e.g. vmx_vmcs_shadow_test
> takes an absurd amount of time and is uninteresting for the vast majority of changes.

This is a good idea as well.


Best regards,
	Maxim Levitsky

> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros
  2022-10-24 19:56       ` Sean Christopherson
@ 2022-10-27 12:07         ` Maxim Levitsky
  2022-10-27 19:39           ` Sean Christopherson
  0 siblings, 1 reply; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-27 12:07 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Mon, 2022-10-24 at 19:56 +0000, Sean Christopherson wrote:
> On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> > On Thu, 2022-10-20 at 18:55 +0000, Sean Christopherson wrote:
> > > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > 
> > > Changelog please.  This patch in particular is extremely difficult to review
> > > without some explanation of what is being done, and why.
> > > 
> > > If it's not too much trouble, splitting this over multiple patches would be nice.
> > > 
> > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > > ---
> > > >  lib/x86/svm_lib.h | 58 +++++++++++++++++++++++++++++++++++++++
> > > >  x86/svm.c         | 51 ++++++++++------------------------
> > > >  x86/svm.h         | 70 ++---------------------------------------------
> > > >  x86/svm_tests.c   | 24 ++++++++++------
> > > >  4 files changed, 91 insertions(+), 112 deletions(-)
> > > > 
> > > > diff --git a/lib/x86/svm_lib.h b/lib/x86/svm_lib.h
> > > > index 27c3b137..59db26de 100644
> > > > --- a/lib/x86/svm_lib.h
> > > > +++ b/lib/x86/svm_lib.h
> > > > @@ -71,4 +71,62 @@ u8* svm_get_io_bitmap(void);
> > > >  #define MSR_BITMAP_SIZE 8192
> > > >  
> > > >  
> > > > +struct svm_extra_regs
> > > 
> > > Why not just svm_gprs?  This could even include RAX by grabbing it from the VMCB
> > > after VMRUN.
> > 
> > I prefer to have a single source of truth - if I grab it from vmcb, then
> > it will have to be synced to vmcb on each vmrun, like the KVM does,
> > but it also has dirty registers bitmap and such.
> 
> KUT doesn't need a dirty registers bitmap.  That's purely a performance optimization
> for VMX so that KVM can avoid unnecessary VMWRITEs for RIP and RSP.  E.g. SVM
> ignores the dirty bitmap entirely:
I know that.

> 
>   static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
>   {
> 	struct vcpu_svm *svm = to_svm(vcpu);
> 
> 	trace_kvm_entry(vcpu);
> 
> 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
> 	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
> 	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
> 
> 	...
> 
>   }
> 
> And even for VMX, I can't imagine a nVMX test will ever be so performance
> sensitive that an extra VMWRITE for RSP will be a problem.
> 
> > I prefer to keep it simple.

I too. So the only other more or less clean way is to copy the RAX and RSP from vmcb to 
svm_gprs on exit, and vise versa on VM entry. Is this what you mean?

> 
> The issue is simplifying the assembly code increases the complexity of the users.
> E.g. users and readers need to understand what "extra regs", which means documenting
> what is included and what's not.  On the other hand, the assembly is already quite
> complex, adding a few lines to swap RAX and RSP doesn't really change the overall
> of complexity of that low level code.
> 
> The other bit of complexity is that if a test wants to access all GPRs, it needs
> both this struct and the VMCB.  RSP is unlikely to be problematic, but I can see
> guest.RAX being something a test wants access to.
> 
> > Plus there is also RSP in vmcb, and RFLAGS, and even RIP to some extent is a GPR.
> 
> RIP is definitely not a GPR, it has no assigned index.  RFLAGS is also not a GPR.
> 
> > To call this struct svm_gprs, I would have to include them there as well.
> 
> RAX and RSP are the only GPRs that need to be moved to/from the VMCB.  
> 
> > And also there is segment registers, etc, etc.
> 
> Which aren't GPRs.

But user can want to use them too.

> 
> > So instead of pretending that this struct contains all the GPRs of the guest
> > (or host while guest is running) I renamed it to state that it contains only
> > some gprs that SVM doesn't context switch.
> 
> ...
> 
> > > > +               "xchg %%rdx, 0x10(%%" reg ")\n\t"       \
> > > > +               "xchg %%rbp, 0x18(%%" reg ")\n\t"       \
> > > > +               "xchg %%rsi, 0x20(%%" reg ")\n\t"       \
> > > > +               "xchg %%rdi, 0x28(%%" reg ")\n\t"       \
> > > > +               "xchg %%r8,  0x30(%%" reg ")\n\t"       \
> > > > +               "xchg %%r9,  0x38(%%" reg ")\n\t"       \
> > > > +               "xchg %%r10, 0x40(%%" reg ")\n\t"       \
> > > > +               "xchg %%r11, 0x48(%%" reg ")\n\t"       \
> > > > +               "xchg %%r12, 0x50(%%" reg ")\n\t"       \
> > > > +               "xchg %%r13, 0x58(%%" reg ")\n\t"       \
> > > > +               "xchg %%r14, 0x60(%%" reg ")\n\t"       \
> > > > +               "xchg %%r15, 0x68(%%" reg ")\n\t"       \
> > > > +               \
> > > 
> > > Extra line.
> > > 
> > > > +               "xchg %%rbx, 0x00(%%" reg ")\n\t"       \
> > > 
> > > Why is RBX last here, but first in the struct?  Ah, because the initial swap uses
> > > RBX as the scratch register.  Why use RAX for the post-VMRUN swap?  AFAICT, that's
> > > completely arbitrary.
> > 
> > Let me explain:
> > 
> > On entry to the guest the code has to save the host GPRs and then load the guest GPRs.
> > 
> > Host RAX and RBX are set by the gcc as I requested with "a" and "b"
> > modifiers, but even these should not be changed by the assembly code from the
> > values set in the input.
> > (At least I haven't found a way to mark a register as both input and clobber)
> 
> The way to achive input+clobber is to use input+output, i.e. "+b" (regs), but I
> think that's a moot point...
I'll try that.

> 
> > Now RAX is the hardcoded input to VMRUN, thus I leave it alone, and use RBX
> > as regs pointer, which is restored to the guest value (and host value stored
> > in the regs) at the end of SWAP_GPRs.
> 
> ...because SWAP_GPRs isn't the end of the asm blob.  As long as RBX holds the
> same value (regs) at the end of the asm blob, no clobbering is necessary even if
> RBX is changed within the blob.
Exactly - I preserved it over the stack, but if I can tell gcc that my macro
clobbers it, then I won't need to.


> 
> > If I switch to full blown assembly function for this, then I could do it.
> > 
> > Note though that my LBR tests do still need this as a macro because they must
> > not do any extra jumps/calls as these clobber the LBR registers.
> 
> Shouldn't it be fairly easy to account for the CALL in the asm routine?  Taking
> on that sort of dependency is quite gross, but it'd likely be less maintenance
> in the long run than an inline asm blob.

That is not possible - the SVM has just one LBR - so doing call will erase it.

I'll think of something, I also do want to turn this into a function.

Best regards,
	Maxim Levitsky

> 



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow
  2022-10-27 10:16         ` Maxim Levitsky
@ 2022-10-27 15:50           ` Sean Christopherson
  2022-10-27 17:10             ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-27 15:50 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 27, 2022, Maxim Levitsky wrote:
> On Mon, 2022-10-24 at 22:49 +0000, Sean Christopherson wrote:
> > On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> > > I usually use just "\n", but the safest is "\n\t".
> > 
> > I'm pretty sure we can ignore GCC's warning here and maximize readability.  There
> > are already plenty of asm blobs that use a semicolon.
> 
> IMHO this is corner cutting and you yourself said that this is wrong.
> 
> The other instances which use semicolon should be fixed IMHO.

The kernel itself has multiple instances of "sti; ..." alone, I'm quite confident
this we can prioritize making the code easy to read without risking future breakage.

$ git grep -E "\"sti\;"
arch/x86/include/asm/irqflags.h:        asm volatile("sti; hlt": : :"memory");
arch/x86/include/asm/mwait.h:   asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/paravirt.h:        PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c:            asm volatile("sti; hlt; cli");
tools/testing/selftests/x86/iopl.c:             asm volatile("sti; pushf; pop %[flags]"

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer
  2022-10-27 10:19         ` Maxim Levitsky
@ 2022-10-27 15:54           ` Sean Christopherson
  2022-10-27 17:11             ` Maxim Levitsky
  0 siblings, 1 reply; 51+ messages in thread
From: Sean Christopherson @ 2022-10-27 15:54 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 27, 2022, Maxim Levitsky wrote:
> On Mon, 2022-10-24 at 16:10 +0000, Sean Christopherson wrote:
> > On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> > > On Thu, 2022-10-20 at 19:14 +0000, Sean Christopherson wrote:
> > > > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > > > +       // ensure that a pending timer is serviced
> > > > > +       irq_enable();
> > > > 
> > > > Jumping back to the "nop" patch, I'm reinforcing my vote to add sti_nop().  I
> > > > actually starting typing a response to say this is broken before remembering that
> > > > a nop got added to irq_enable().
> > > 
> > > OK, although, for someone that doesn't know about the interrupt shadow (I
> > > guess most of the people that will look at this code), the above won't
> > > confuse them, in fact sti_nop() might confuse someone who doesn't know about
> > > why this nop is needed.
> > 
> > The difference is that sti_nop() might leave unfamiliar readers asking "why", but
> > it won't actively mislead them.  And the "why" can be easily answered by a comment
> > above sti_nop() to describe its purpose.  A "see also safe_halt()" with a comment
> > there would be extra helpful, as "safe halt" is the main reason the STI shadow is
> > even a thing.
> > 
> > On the other hand, shoving a NOP into irq_enable() is pretty much guaranteed to
> > cause problems for readers that do know about STI shadows since there's nothing
> > in the name "irq_enable" that suggests that the helper also intentionally eats the
> > interrupt shadow, and especically because the kernel's local_irq_enable() distills
> > down to a bare STI.
> 
> I still don't agree with you on this at all. I would like to hear what other
> KVM developers think about it.

Why not just kill off irq_enable() and irq_disable() and use sti() and cli()?
Then we don't have to come to any agreement on whether or not shoving a NOP into
irq_enable() is a good idea.

> safe_halt actually is a example for function that abstacts away the nop -
> just what I want to do.

The difference is that "safe halt" is established terminology that specifically
means "STI immediately followed by HLT".

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow
  2022-10-27 15:50           ` Sean Christopherson
@ 2022-10-27 17:10             ` Maxim Levitsky
  0 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-27 17:10 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-27 at 15:50 +0000, Sean Christopherson wrote:
> On Thu, Oct 27, 2022, Maxim Levitsky wrote:
> > On Mon, 2022-10-24 at 22:49 +0000, Sean Christopherson wrote:
> > > On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> > > > I usually use just "\n", but the safest is "\n\t".
> > > 
> > > I'm pretty sure we can ignore GCC's warning here and maximize readability.  There
> > > are already plenty of asm blobs that use a semicolon.
> > 
> > IMHO this is corner cutting and you yourself said that this is wrong.
> > 
> > The other instances which use semicolon should be fixed IMHO.
> 
> The kernel itself has multiple instances of "sti; ..." alone, I'm quite confident
> this we can prioritize making the code easy to read without risking future breakage.
> 
> $ git grep -E "\"sti\;"
> arch/x86/include/asm/irqflags.h:        asm volatile("sti; hlt": : :"memory");
> arch/x86/include/asm/mwait.h:   asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
> arch/x86/include/asm/paravirt.h:        PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
> tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c:            asm volatile("sti; hlt; cli");
> tools/testing/selftests/x86/iopl.c:             asm volatile("sti; pushf; pop %[flags]"
> 

All right, let it be, but then lets also replace of '\n\t' with just '\n', just so that we don't pretend that
we follow the gcc advice, to at least be consistent.

Best regards,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer
  2022-10-27 15:54           ` Sean Christopherson
@ 2022-10-27 17:11             ` Maxim Levitsky
  0 siblings, 0 replies; 51+ messages in thread
From: Maxim Levitsky @ 2022-10-27 17:11 UTC (permalink / raw)
  To: Sean Christopherson; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, 2022-10-27 at 15:54 +0000, Sean Christopherson wrote:
> On Thu, Oct 27, 2022, Maxim Levitsky wrote:
> > On Mon, 2022-10-24 at 16:10 +0000, Sean Christopherson wrote:
> > > On Mon, Oct 24, 2022, Maxim Levitsky wrote:
> > > > On Thu, 2022-10-20 at 19:14 +0000, Sean Christopherson wrote:
> > > > > On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> > > > > > +       // ensure that a pending timer is serviced
> > > > > > +       irq_enable();
> > > > > 
> > > > > Jumping back to the "nop" patch, I'm reinforcing my vote to add sti_nop().  I
> > > > > actually starting typing a response to say this is broken before remembering that
> > > > > a nop got added to irq_enable().
> > > > 
> > > > OK, although, for someone that doesn't know about the interrupt shadow (I
> > > > guess most of the people that will look at this code), the above won't
> > > > confuse them, in fact sti_nop() might confuse someone who doesn't know about
> > > > why this nop is needed.
> > > 
> > > The difference is that sti_nop() might leave unfamiliar readers asking "why", but
> > > it won't actively mislead them.  And the "why" can be easily answered by a comment
> > > above sti_nop() to describe its purpose.  A "see also safe_halt()" with a comment
> > > there would be extra helpful, as "safe halt" is the main reason the STI shadow is
> > > even a thing.
> > > 
> > > On the other hand, shoving a NOP into irq_enable() is pretty much guaranteed to
> > > cause problems for readers that do know about STI shadows since there's nothing
> > > in the name "irq_enable" that suggests that the helper also intentionally eats the
> > > interrupt shadow, and especically because the kernel's local_irq_enable() distills
> > > down to a bare STI.
> > 
> > I still don't agree with you on this at all. I would like to hear what other
> > KVM developers think about it.
> 
> Why not just kill off irq_enable() and irq_disable() and use sti() and cli()?
> Then we don't have to come to any agreement on whether or not shoving a NOP into
> irq_enable() is a good idea.
> 
> > safe_halt actually is a example for function that abstacts away the nop -
> > just what I want to do.
> 
> The difference is that "safe halt" is established terminology that specifically
> means "STI immediately followed by HLT".
> 

OK, let it be.

Best regards,
	Maxim Levitsky


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 16/16] add IPI loss stress test
  2022-10-27 11:00         ` Maxim Levitsky
@ 2022-10-27 18:41           ` Sean Christopherson
  0 siblings, 0 replies; 51+ messages in thread
From: Sean Christopherson @ 2022-10-27 18:41 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 27, 2022, Maxim Levitsky wrote:
> On Mon, 2022-10-24 at 17:19 +0000, Sean Christopherson wrote:
> > That doesn't (and shouldn't) wake the vCPU from the guest's perspective.  If/when
> > userspace calls KVM_RUN again, the vCPU's state should still be KVM_MP_STATE_HALTED
> > and thus KVM will invoke vcpu_block() until there is an actual wake event.
> 
> Well HLT is allowed to do suprious wakeups so KVM is allowed to not do it correclty,

I suspect the above "HLT is allowed to do spurious wakeups" is a typo, but in case
it's not, the SDM says:

  An enabled interrupt (including NMI and SMI), a debug exception, the BINIT# signal,
  the INIT# signal, or the RESET# signal will resume execution.

and the APM says:

  Execution resumes when an unmasked hardware interrupt (INTR), non-maskable
  interrupt (NMI), system management interrupt (SMI), RESET, or INIT occurs.

I.e. resuming from HLT without a valid wake event is a violation of the x86 architecture.

> > > In fact I'll just do it - just need to pick some open source PRNG code.
> > > Do you happen to know a good one? Mersenne Twister? 
> > 
> > It probably makes sense to use whatever we end up using for selftests[*] in order
> > to minimize the code we have to maintain.
> > 
> > [*] https://lore.kernel.org/all/20221019221321.3033920-2-coltonlewis@google.com
> 
> Makes sense. I'll then just take this generator and adopt it to the kvm unit tests.
> Or do you want to actually share the code? via a kernel header or something?

Sadly, just copy+paste for now.  It'd be nice to share code, e.g. for the myriad
X86_FEATURE_* flags, but's a separate problem.

> > > That is the problem - the delay is just in TSC freq units, and knowing TSC freq
> > > for some reason on x86 is next to impossible on AMD
> > 
> > Ah, delay() takes the number cycles.  Ugh.
> > 
> > We should fix that, e.g. use the CPUID-provided frequency when possible (KVM should
> > emulate this if it doesn't already), and then #define an arbitrary TSC frequency as
> > a fall back so that we can write readable code, e.g. 2.4Ghz is probably close enough
> > to work.
> 
> KVM doesn't emulate the Intel's specific way of reporting TSC freq on AMD.
> In some sense this is wrong to do as it is Intel specific.
> 
> I do think that it is a great idea to report the TSC freq via some KVM specific MSR.
> That might though open a pandora box in regard to migration.

Heh, yeah, the Hyper-V TSC stuff is rather ugly.

> I don't like the 2.4Ghz idea at all - it is once again corner cutting. Its true
> that most code doens't need exact delay, not to mention that delay is never going
> to be exact, but once you expose (nano)second based interface, test writers
> will start to use it, and then wonder why someone hardcoded it to 2.4 GHz.a

True.  A really crazy/bad idea would be to get APERF/MPERF from /dev/cpu/0/msr
in the run script and somehow feed the host TSC into KUT :-)

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros
  2022-10-27 12:07         ` Maxim Levitsky
@ 2022-10-27 19:39           ` Sean Christopherson
  0 siblings, 0 replies; 51+ messages in thread
From: Sean Christopherson @ 2022-10-27 19:39 UTC (permalink / raw)
  To: Maxim Levitsky; +Cc: kvm, Cathy Avery, Paolo Bonzini

On Thu, Oct 27, 2022, Maxim Levitsky wrote:
> On Mon, 2022-10-24 at 19:56 +0000, Sean Christopherson wrote:
> > > And also there is segment registers, etc, etc.
> > 
> > Which aren't GPRs.
> 
> But user can want to use them too.

My point is that they don't need to be handled in this the VM-Entry/VM-Exit path
as both VMX and SVM context switch all segment information through the VMCS/VMCB.
In other words, if we want to provide easy, generic access to segment information,
that can be done completely separately from this code and in a separate struct.

> > > Note though that my LBR tests do still need this as a macro because they must
> > > not do any extra jumps/calls as these clobber the LBR registers.
> > 
> > Shouldn't it be fairly easy to account for the CALL in the asm routine?  Taking
> > on that sort of dependency is quite gross, but it'd likely be less maintenance
> > in the long run than an inline asm blob.
> 
> That is not possible - the SVM has just one LBR - so doing call will erase it.

Ugh, that's a pain.  

> I'll think of something, I also do want to turn this into a function.

Actually, IIUC, there's no need to preserve the LBR across the call to a VMRUN
subroutine.  When checking that the host value is preserved, LBRs are disabled
before the call.  When checking that the guest value leaks back into the host,
the host value is irrelevant, the only thing that matters is that the LBR is
pre-filled with something other than the guest value, and that functionality is
provided by the call into the VMRUN subroutine.

LBR side topic #1, sequences like this should really be a single asm blob:

	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
	DO_BRANCH(...);
	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);

as there is nothing that prevents the compiler from inserting a branch between
DO_BRANCH() and the wrmsr().  It's extremely unlikely, but technicall possible.

LBR side topic #2, the tests are broken on our Milan systems.  I've poked around
a few times, but haven't dug in deep yet (and probably won't have cycles to do so
anytime soon).

PASS: Basic LBR test
PASS: Test that without LBRV enabled, guest LBR state does 'leak' to the host(1)
PASS: Test that without LBRV enabled, guest LBR state does 'leak' to the host(2)
PASS: Test that with LBRV enabled, guest LBR state doesn't leak (1)
Unhandled exception 6 #UD at ip 000000000040175c
error_code=0000      rflags=00010086      cs=00000008
rax=00000000004016e7 rcx=00000000000001dc rdx=80000000004016e7 rbx=0000000000414920
rbp=000000000042fa38 rsi=0000000000000000 rdi=0000000000414d98
 r8=00000000004176f9  r9=00000000000003f8 r10=000000000000000d r11=0000000000000000
r12=0000000000000000 r13=0000000000000000 r14=0000000000000000 r15=0000000000000000
cr0=0000000080010011 cr2=0000000000000000 cr3=00000000010bf000 cr4=0000000000040020
cr8=0000000000000000

^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2022-10-27 19:39 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow Maxim Levitsky
2022-10-20 18:01   ` Sean Christopherson
2022-10-24 12:36     ` Maxim Levitsky
2022-10-24 22:49       ` Sean Christopherson
2022-10-27 10:16         ` Maxim Levitsky
2022-10-27 15:50           ` Sean Christopherson
2022-10-27 17:10             ` Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer Maxim Levitsky
2022-10-20 19:14   ` Sean Christopherson
2022-10-24 12:37     ` Maxim Levitsky
2022-10-24 16:10       ` Sean Christopherson
2022-10-27 10:19         ` Maxim Levitsky
2022-10-27 15:54           ` Sean Christopherson
2022-10-27 17:11             ` Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 03/16] svm: use irq_enable instead of sti/nop Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 04/16] svm: make svm_intr_intercept_mix_if/gif test a bit more robust Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 05/16] svm: use apic_start_timer/apic_stop_timer instead of open coding it Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 06/16] x86: Add test for #SMI during interrupt window Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 07/16] x86: Add a simple test for SYSENTER instruction Maxim Levitsky
2022-10-20 19:25   ` Sean Christopherson
2022-10-24 12:38     ` Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test Maxim Levitsky
2022-10-20 15:26   ` Maxim Levitsky
2022-10-20 19:06     ` Sean Christopherson
2022-10-24 12:39       ` Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 09/16] svm: move svm spec definitions to lib/x86/svm.h Maxim Levitsky
2022-10-20 19:08   ` Sean Christopherson
2022-10-20 15:23 ` [kvm-unit-tests PATCH 10/16] svm: move some svm support functions into lib/x86/svm_lib.h Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 11/16] svm: add svm_suported Maxim Levitsky
2022-10-20 18:21   ` Sean Christopherson
2022-10-24 12:40     ` Maxim Levitsky
2022-10-20 15:24 ` [kvm-unit-tests PATCH 12/16] svm: move setup_svm to svm_lib.c Maxim Levitsky
2022-10-20 15:24 ` [kvm-unit-tests PATCH 13/16] svm: move vmcb_ident " Maxim Levitsky
2022-10-20 18:37   ` Sean Christopherson
2022-10-24 12:46     ` Maxim Levitsky
2022-10-20 15:24 ` [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros Maxim Levitsky
2022-10-20 18:55   ` Sean Christopherson
2022-10-24 12:45     ` Maxim Levitsky
2022-10-24 19:56       ` Sean Christopherson
2022-10-27 12:07         ` Maxim Levitsky
2022-10-27 19:39           ` Sean Christopherson
2022-10-20 15:24 ` [kvm-unit-tests PATCH 15/16] svm: introduce svm_vcpu Maxim Levitsky
2022-10-20 19:02   ` Sean Christopherson
2022-10-24 12:46     ` Maxim Levitsky
2022-10-20 15:24 ` [kvm-unit-tests PATCH 16/16] add IPI loss stress test Maxim Levitsky
2022-10-20 20:23   ` Sean Christopherson
2022-10-24 12:54     ` Maxim Levitsky
2022-10-24 17:19       ` Sean Christopherson
2022-10-27 11:00         ` Maxim Levitsky
2022-10-27 18:41           ` Sean Christopherson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.