kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] SVM: add test for nested guest RIP corruption
@ 2020-06-22 16:55 Maxim Levitsky
  2020-06-22 17:30 ` Paolo Bonzini
  0 siblings, 1 reply; 3+ messages in thread
From: Maxim Levitsky @ 2020-06-22 16:55 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Vitaly Kuznetsov, Maxim Levitsky

This adds a unit test for SVM nested register corruption that happened when
L0 was emulating an instruction, but then injecting an interrupt intercept to L1, which
lead it to give L1 vmexit handler stale (pre emulation) values of RAX,RIP and RSP.

This test detects the RIP corruption (situation when RIP is at the start of
the emulated instruction but the instruction, was already executed.

The upstream commit that fixed this bug is b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
  KVM: nSVM: Preserve registers modifications done before nested_svm_vmexit()

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 x86/svm_tests.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index c1abd55..30009c4 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -1789,6 +1789,106 @@ static bool virq_inject_check(struct svm_test *test)
     return get_test_stage(test) == 5;
 }
 
+/*
+ * Detect nested guest RIP corruption as explained in kernel commit
+ * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
+ *
+ * In the assembly loop below, execute 'ins' from a IO port,
+ * while not intercepting IO violations, so that this instruction is
+ * intercepted and emulated by the L0 qemu.
+ *
+ * At the same time we are getting interrupts from the local APIC timer,
+ * and we do intercept them in L1
+ *
+ * If interrupt happens on the insb instruction, L0 will VMexit, emulate
+ * the insb instruction and then it will try to inject the interrupt to L1
+ * by doing a nested VMexit (since L1 intercepts interrupts),
+ * and due to a bug it will use pre-emulation value of RIP,RAX and RSP.
+ *
+ * In our intercept handler we check that RIP is of the insb instruction,
+ * (corrupted) but its memory operand is already written meaning,
+ * that insb was already executed.
+ */
+
+static volatile int isr_cnt = 0;
+static volatile uint8_t io_port_var = 0xAA;
+extern const char insb_instruction_label[];
+
+static void reg_corruption_isr(isr_regs_t *regs)
+{
+    isr_cnt++;
+    apic_write(APIC_EOI, 0);
+}
+
+static void reg_corruption_prepare(struct svm_test *test)
+{
+    default_prepare(test);
+    set_test_stage(test, 0);
+
+    vmcb->control.int_ctl = V_INTR_MASKING_MASK;
+    vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
+
+    handle_irq(TIMER_VECTOR, reg_corruption_isr);
+
+    /* set local APIC to inject external interrupts */
+    apic_write(APIC_TMICT, 0);
+    apic_write(APIC_TDCR, 0);
+    apic_write(APIC_LVTT, TIMER_VECTOR | APIC_LVT_TIMER_PERIODIC);
+    apic_write(APIC_TMICT, 1000);
+}
+
+static void reg_corruption_test(struct svm_test *test)
+{
+    /* this is endless loop, which is interrupted by the timer interrupt */
+    asm volatile (
+            "again:\n\t"
+            "movw $0x4d0, %%dx\n\t" // IO port
+            "lea %[_io_port_var], %%rdi\n\t"
+            "movb $0xAA, %[_io_port_var]\n\t"
+            "insb_instruction_label:\n\t"
+            "insb\n\t"
+            "jmp again\n\t"
+
+            : [_io_port_var] "=m" (io_port_var)
+            : /* no inputs*/
+            : "rdx", "rdi"
+    );
+}
+
+static bool reg_corruption_finished(struct svm_test *test)
+{
+    if (isr_cnt == 10000) {
+        report(true,
+               "No RIP corruption detected after %d timer interrupts",
+               isr_cnt);
+        set_test_stage(test, 1);
+        return true;
+    }
+
+    if (vmcb->control.exit_code == SVM_EXIT_INTR) {
+
+        void* guest_rip = (void*)vmcb->save.rip;
+
+        irq_enable();
+        asm volatile ("nop");
+        irq_disable();
+
+        if (guest_rip == insb_instruction_label && io_port_var != 0xAA) {
+            report(false,
+                   "RIP corruption detected after %d timer interrupts",
+                   isr_cnt);
+            return true;
+        }
+
+    }
+    return false;
+}
+
+static bool reg_corruption_check(struct svm_test *test)
+{
+    return get_test_stage(test) == 1;
+}
+
 #define TEST(name) { #name, .v2 = name }
 
 /*
@@ -1950,6 +2050,9 @@ struct svm_test svm_tests[] = {
     { "virq_inject", default_supported, virq_inject_prepare,
       default_prepare_gif_clear, virq_inject_test,
       virq_inject_finished, virq_inject_check },
+    { "reg_corruption", default_supported, reg_corruption_prepare,
+      default_prepare_gif_clear, reg_corruption_test,
+      reg_corruption_finished, reg_corruption_check },
     TEST(svm_guest_state_test),
     { NULL, NULL, NULL, NULL, NULL, NULL, NULL }
 };
-- 
2.25.4


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] SVM: add test for nested guest RIP corruption
  2020-06-22 16:55 [PATCH] SVM: add test for nested guest RIP corruption Maxim Levitsky
@ 2020-06-22 17:30 ` Paolo Bonzini
  2020-06-23 10:48   ` Maxim Levitsky
  0 siblings, 1 reply; 3+ messages in thread
From: Paolo Bonzini @ 2020-06-22 17:30 UTC (permalink / raw)
  To: Maxim Levitsky, kvm; +Cc: Vitaly Kuznetsov

On 22/06/20 18:55, Maxim Levitsky wrote:
> +/*
> + * Detect nested guest RIP corruption as explained in kernel commit
> + * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
> + *
> + * In the assembly loop below, execute 'ins' from a IO port,
> + * while not intercepting IO violations, so that this instruction is
> + * intercepted and emulated by the L0 qemu.
> + *
> + * At the same time we are getting interrupts from the local APIC timer,
> + * and we do intercept them in L1
> + *
> + * If interrupt happens on the insb instruction, L0 will VMexit, emulate
> + * the insb instruction and then it will try to inject the interrupt to L1
> + * by doing a nested VMexit (since L1 intercepts interrupts),
> + * and due to a bug it will use pre-emulation value of RIP,RAX and RSP.
> + *
> + * In our intercept handler we check that RIP is of the insb instruction,
> + * (corrupted) but its memory operand is already written meaning,
> + * that insb was already executed.
> + */

Looks good, just some wording improvements

diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index 30009c4..827ff87 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -1793,21 +1793,20 @@ static bool virq_inject_check(struct svm_test *test)
  * Detect nested guest RIP corruption as explained in kernel commit
  * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
  *
- * In the assembly loop below, execute 'ins' from a IO port,
- * while not intercepting IO violations, so that this instruction is
- * intercepted and emulated by the L0 qemu.
+ * In the assembly loop below 'ins' is executed while IO instructions
+ * are not intercepted; the instruction is emulated by L0.
  *
  * At the same time we are getting interrupts from the local APIC timer,
  * and we do intercept them in L1
  *
- * If interrupt happens on the insb instruction, L0 will VMexit, emulate
- * the insb instruction and then it will try to inject the interrupt to L1
- * by doing a nested VMexit (since L1 intercepts interrupts),
- * and due to a bug it will use pre-emulation value of RIP,RAX and RSP.
+ * If the interrupt happens on the insb instruction, L0 will VMexit, emulate
+ * the insb instruction and then it will inject the interrupt to L1 through
+ * a nested VMexit.  Due to a bug, it would leave pre-emulation values of RIP,
+ * RAX and RSP in the VMCB.
  *
- * In our intercept handler we check that RIP is of the insb instruction,
- * (corrupted) but its memory operand is already written meaning,
- * that insb was already executed.
+ * In our intercept handler we detect the bug by checking that RIP is that of
+ * the insb instruction, but its memory operand has already been written.
+ * This means that insb was already executed.
  */
 
> +static void reg_corruption_test(struct svm_test *test)
> +{
> +    /* this is endless loop, which is interrupted by the timer interrupt */
> +    asm volatile (
> +            "again:\n\t"
> +            "movw $0x4d0, %%dx\n\t" // IO port
> +            "lea %[_io_port_var], %%rdi\n\t"
> +            "movb $0xAA, %[_io_port_var]\n\t"
> +            "insb_instruction_label:\n\t"
> +            "insb\n\t"
> +            "jmp again\n\t"

And here you could use "1:" and "jmp 1b" instead of a global label.

You said offlist that an "inb" instruction would not work, but I'm not sure
I understand why.  Wouldn't you see "0xAA" in vmcb->save.rax with the fixed
kernel, and something else with the broken one?

Paolo

> +
> +            : [_io_port_var] "=m" (io_port_var)
> +            : /* no inputs*/
> +            : "rdx", "rdi"
> +    );
> +}
> +
> +static bool reg_corruption_finished(struct svm_test *test)
> +{
> +    if (isr_cnt == 10000) {
> +        report(true,
> +               "No RIP corruption detected after %d timer interrupts",
> +               isr_cnt);
> +        set_test_stage(test, 1);
> +        return true;
> +    }
> +
> +    if (vmcb->control.exit_code == SVM_EXIT_INTR) {
> +
> +        void* guest_rip = (void*)vmcb->save.rip;
> +
> +        irq_enable();
> +        asm volatile ("nop");
> +        irq_disable();
> +
> +        if (guest_rip == insb_instruction_label && io_port_var != 0xAA) {
> +            report(false,
> +                   "RIP corruption detected after %d timer interrupts",
> +                   isr_cnt);
> +            return true;
> +        }
> +
> +    }
> +    return false;
> +}
> +
> +static bool reg_corruption_check(struct svm_test *test)
> +{
> +    return get_test_stage(test) == 1;
> +}
> +
>  #define TEST(name) { #name, .v2 = name }
>  
>  /*
> @@ -1950,6 +2050,9 @@ struct svm_test svm_tests[] = {
>      { "virq_inject", default_supported, virq_inject_prepare,
>        default_prepare_gif_clear, virq_inject_test,
>        virq_inject_finished, virq_inject_check },
> +    { "reg_corruption", default_supported, reg_corruption_prepare,
> +      default_prepare_gif_clear, reg_corruption_test,
> +      reg_corruption_finished, reg_corruption_check },
>      TEST(svm_guest_state_test),
>      { NULL, NULL, NULL, NULL, NULL, NULL, NULL }
>  };
> 


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] SVM: add test for nested guest RIP corruption
  2020-06-22 17:30 ` Paolo Bonzini
@ 2020-06-23 10:48   ` Maxim Levitsky
  0 siblings, 0 replies; 3+ messages in thread
From: Maxim Levitsky @ 2020-06-23 10:48 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Vitaly Kuznetsov

On Mon, 2020-06-22 at 19:30 +0200, Paolo Bonzini wrote:
> On 22/06/20 18:55, Maxim Levitsky wrote:
> > +/*
> > + * Detect nested guest RIP corruption as explained in kernel commit
> > + * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
> > + *
> > + * In the assembly loop below, execute 'ins' from a IO port,
> > + * while not intercepting IO violations, so that this instruction is
> > + * intercepted and emulated by the L0 qemu.
> > + *
> > + * At the same time we are getting interrupts from the local APIC timer,
> > + * and we do intercept them in L1
> > + *
> > + * If interrupt happens on the insb instruction, L0 will VMexit, emulate
> > + * the insb instruction and then it will try to inject the interrupt to L1
> > + * by doing a nested VMexit (since L1 intercepts interrupts),
> > + * and due to a bug it will use pre-emulation value of RIP,RAX and RSP.
> > + *
> > + * In our intercept handler we check that RIP is of the insb instruction,
> > + * (corrupted) but its memory operand is already written meaning,
> > + * that insb was already executed.
> > + */
> 
> Looks good, just some wording improvements
> 
> diff --git a/x86/svm_tests.c b/x86/svm_tests.c
> index 30009c4..827ff87 100644
> --- a/x86/svm_tests.c
> +++ b/x86/svm_tests.c
> @@ -1793,21 +1793,20 @@ static bool virq_inject_check(struct svm_test *test)
>   * Detect nested guest RIP corruption as explained in kernel commit
>   * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
>   *
> - * In the assembly loop below, execute 'ins' from a IO port,
> - * while not intercepting IO violations, so that this instruction is
> - * intercepted and emulated by the L0 qemu.
> + * In the assembly loop below 'ins' is executed while IO instructions
> + * are not intercepted; the instruction is emulated by L0.
>   *
>   * At the same time we are getting interrupts from the local APIC timer,
>   * and we do intercept them in L1
>   *
> - * If interrupt happens on the insb instruction, L0 will VMexit, emulate
> - * the insb instruction and then it will try to inject the interrupt to L1
> - * by doing a nested VMexit (since L1 intercepts interrupts),
> - * and due to a bug it will use pre-emulation value of RIP,RAX and RSP.
> + * If the interrupt happens on the insb instruction, L0 will VMexit, emulate
> + * the insb instruction and then it will inject the interrupt to L1 through
> + * a nested VMexit.  Due to a bug, it would leave pre-emulation values of RIP,
> + * RAX and RSP in the VMCB.
>   *
> - * In our intercept handler we check that RIP is of the insb instruction,
> - * (corrupted) but its memory operand is already written meaning,
> - * that insb was already executed.
> + * In our intercept handler we detect the bug by checking that RIP is that of
> + * the insb instruction, but its memory operand has already been written.
> + * This means that insb was already executed.
>   */
Fixed.

>  
> > +static void reg_corruption_test(struct svm_test *test)
> > +{
> > +    /* this is endless loop, which is interrupted by the timer interrupt */
> > +    asm volatile (
> > +            "again:\n\t"
> > +            "movw $0x4d0, %%dx\n\t" // IO port
> > +            "lea %[_io_port_var], %%rdi\n\t"
> > +            "movb $0xAA, %[_io_port_var]\n\t"
> > +            "insb_instruction_label:\n\t"
> > +            "insb\n\t"
> > +            "jmp again\n\t"
> 
> And here you could use "1:" and "jmp 1b" instead of a global label.
Good idea, I will do this in V2.

> 
> You said offlist that an "inb" instruction would not work, but I'm not sure
> I understand why.  Wouldn't you see "0xAA" in vmcb->save.rax with the fixed
> kernel, and something else with the broken one?

The problem is that when you use orginary inb, you can't distinguish between
case when inb just wasn't yet executed (and we got an interrupt on previous instruction)
or it was executed, but due to corruption all its side effects were reverted

(since inb only effect is a change of RIP and a change of EAX, and sadly EAX is hardcoded
for this instruction, since this is good old instruction from the good old 
 days when 'a' meant accumulator, and both are corrupted (reverted to old values) )

On the other hand, insb, effect is to write to memory the value, and change RIP,
and only RIP is reverted to old value, so I can detect this.

I'll send a V2 now.

Best regards,
	Maxim Levitsky


> 
> Paolo
> 
> > +
> > +            : [_io_port_var] "=m" (io_port_var)
> > +            : /* no inputs*/
> > +            : "rdx", "rdi"
> > +    );
> > +}
> > +
> > +static bool reg_corruption_finished(struct svm_test *test)
> > +{
> > +    if (isr_cnt == 10000) {
> > +        report(true,
> > +               "No RIP corruption detected after %d timer interrupts",
> > +               isr_cnt);
> > +        set_test_stage(test, 1);
> > +        return true;
> > +    }
> > +
> > +    if (vmcb->control.exit_code == SVM_EXIT_INTR) {
> > +
> > +        void* guest_rip = (void*)vmcb->save.rip;
> > +
> > +        irq_enable();
> > +        asm volatile ("nop");
> > +        irq_disable();
> > +
> > +        if (guest_rip == insb_instruction_label && io_port_var != 0xAA) {
> > +            report(false,
> > +                   "RIP corruption detected after %d timer interrupts",
> > +                   isr_cnt);
> > +            return true;
> > +        }
> > +
> > +    }
> > +    return false;
> > +}
> > +
> > +static bool reg_corruption_check(struct svm_test *test)
> > +{
> > +    return get_test_stage(test) == 1;
> > +}
> > +
> >  #define TEST(name) { #name, .v2 = name }
> >  
> >  /*
> > @@ -1950,6 +2050,9 @@ struct svm_test svm_tests[] = {
> >      { "virq_inject", default_supported, virq_inject_prepare,
> >        default_prepare_gif_clear, virq_inject_test,
> >        virq_inject_finished, virq_inject_check },
> > +    { "reg_corruption", default_supported, reg_corruption_prepare,
> > +      default_prepare_gif_clear, reg_corruption_test,
> > +      reg_corruption_finished, reg_corruption_check },
> >      TEST(svm_guest_state_test),
> >      { NULL, NULL, NULL, NULL, NULL, NULL, NULL }
> >  };
> > 



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-06-23 10:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-22 16:55 [PATCH] SVM: add test for nested guest RIP corruption Maxim Levitsky
2020-06-22 17:30 ` Paolo Bonzini
2020-06-23 10:48   ` Maxim Levitsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).