kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling
@ 2020-09-15 19:15 Sean Christopherson
  2020-09-15 19:15 ` [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine Sean Christopherson
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Sean Christopherson @ 2020-09-15 19:15 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, Josh Poimboeuf, Uros Bizjak,
	Andi Kleen

Clean up KVM's handling of IRQ and NMI exits to move the invocation of the
IRQ handler to a standalone assembly routine, and to then consolidate the
NMI handling to use the same indirect call approach instead of using INTn.

The IRQ cleanup was suggested by Josh Poimboeuf in the context of a false
postive objtool warning[*].  I believe Josh intended to use UNWIND hints
instead of trickery to avoid objtool complaints.  I opted for trickery in
the form of a redundant, but explicit, restoration of RSP after the hidden
IRET.  AFAICT, there are no existing UNWIND hints that would let objtool
know that the stack is magically being restored, and adding a new hint to
save a single MOV <reg>, <reg> instruction seemed like overkill.

The NMI consolidation was loosely suggested by Andi Kleen.  Andi's actual
suggestion was to export and directly call the NMI handler, but that's a
more involved change (unless I'm misunderstanding the wants of the NMI
handler), whereas piggybacking the IRQ code is simple and seems like a
worthwhile intermediate step.

Sean Christopherson (2):
  KVM: VMX: Move IRQ invocation to assembly subroutine
  KVM: VMX: Invoke NMI handler via indirect call instead of INTn

 arch/x86/kvm/vmx/vmenter.S | 34 +++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 61 +++++++++++---------------------------
 2 files changed, 51 insertions(+), 44 deletions(-)

-- 
2.28.0


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine
  2020-09-15 19:15 [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling Sean Christopherson
@ 2020-09-15 19:15 ` Sean Christopherson
  2020-09-15 19:27   ` Josh Poimboeuf
  2020-09-15 19:38   ` Uros Bizjak
  2020-09-15 19:15 ` [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn Sean Christopherson
  2020-09-22 13:38 ` [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling Paolo Bonzini
  2 siblings, 2 replies; 16+ messages in thread
From: Sean Christopherson @ 2020-09-15 19:15 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, Josh Poimboeuf, Uros Bizjak,
	Andi Kleen

Move the asm blob that invokes the appropriate IRQ handler after VM-Exit
into a proper subroutine.  Unconditionally create a stack frame in the
subroutine so that, as objtool sees things, the function has standard
stack behavior.  The dynamic stack adjustment makes using unwind hints
problematic.

Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/vmenter.S | 34 ++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 33 +++------------------------------
 2 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 799db084a336..90ad7a6246e3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -4,6 +4,7 @@
 #include <asm/bitsperlong.h>
 #include <asm/kvm_vcpu_regs.h>
 #include <asm/nospec-branch.h>
+#include <asm/segment.h>
 
 #define WORD_SIZE (BITS_PER_LONG / 8)
 
@@ -294,3 +295,36 @@ SYM_FUNC_START(vmread_error_trampoline)
 
 	ret
 SYM_FUNC_END(vmread_error_trampoline)
+
+SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
+	/*
+	 * Unconditionally create a stack frame, getting the correct RSP on the
+	 * stack (for x86-64) would take two instructions anyways, and RBP can
+	 * be used to restore RSP to make objtool happy (see below).
+	 */
+	push %_ASM_BP
+	mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+	/*
+	 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+	 * creating the synthetic interrupt stack frame for the IRQ/NMI.
+	 */
+	and  $-16, %rsp
+	push $__KERNEL_DS
+	push %rbp
+#endif
+	pushf
+	push $__KERNEL_CS
+	CALL_NOSPEC _ASM_ARG1
+
+	/*
+	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+	 * the correct value.  objtool doesn't know the callee will IRET and,
+	 * without the explicit restore, thinks the stack is getting walloped.
+	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+	 */
+	mov %_ASM_BP, %_ASM_SP
+	pop %_ASM_BP
+	ret
+SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 46ba2e03a892..391f079d9136 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6409,6 +6409,8 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
 }
 
+void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
+
 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 {
 	u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
@@ -6430,10 +6432,6 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 {
 	unsigned int vector;
-	unsigned long entry;
-#ifdef CONFIG_X86_64
-	unsigned long tmp;
-#endif
 	gate_desc *desc;
 	u32 intr_info = vmx_get_intr_info(vcpu);
 
@@ -6443,36 +6441,11 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 
 	vector = intr_info & INTR_INFO_VECTOR_MASK;
 	desc = (gate_desc *)host_idt_base + vector;
-	entry = gate_offset(desc);
 
 	kvm_before_interrupt(vcpu);
-
-	asm volatile(
-#ifdef CONFIG_X86_64
-		"mov %%rsp, %[sp]\n\t"
-		"and $-16, %%rsp\n\t"
-		"push %[ss]\n\t"
-		"push %[sp]\n\t"
-#endif
-		"pushf\n\t"
-		"push %[cs]\n\t"
-		CALL_NOSPEC
-		:
-#ifdef CONFIG_X86_64
-		[sp]"=&r"(tmp),
-#endif
-		ASM_CALL_CONSTRAINT
-		:
-		[thunk_target]"r"(entry),
-#ifdef CONFIG_X86_64
-		[ss]"i"(__KERNEL_DS),
-#endif
-		[cs]"i"(__KERNEL_CS)
-	);
-
+	vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
 	kvm_after_interrupt(vcpu);
 }
-STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
 
 static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 {
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2020-09-15 19:15 [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling Sean Christopherson
  2020-09-15 19:15 ` [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine Sean Christopherson
@ 2020-09-15 19:15 ` Sean Christopherson
  2021-04-26  9:33   ` Lai Jiangshan
  2020-09-22 13:38 ` [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling Paolo Bonzini
  2 siblings, 1 reply; 16+ messages in thread
From: Sean Christopherson @ 2020-09-15 19:15 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, Josh Poimboeuf, Uros Bizjak,
	Andi Kleen

Rework NMI VM-Exit handling to invoke the kernel handler by function
call instead of INTn.  INTn microcode is relatively expensive, and
aligning the IRQ and NMI handling will make it easier to update KVM
should some newfangled method for invoking the handlers come along.

Suggested-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 391f079d9136..b0eca151931d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6411,40 +6411,40 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 
 void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
 
+static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
+{
+	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
+	gate_desc *desc = (gate_desc *)host_idt_base + vector;
+
+	kvm_before_interrupt(vcpu);
+	vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
+	kvm_after_interrupt(vcpu);
+}
+
 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 {
 	u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
 
 	/* if exit due to PF check for async PF */
-	if (is_page_fault(intr_info)) {
+	if (is_page_fault(intr_info))
 		vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
 	/* Handle machine checks before interrupts are enabled */
-	} else if (is_machine_check(intr_info)) {
+	else if (is_machine_check(intr_info))
 		kvm_machine_check();
 	/* We need to handle NMIs before interrupts are enabled */
-	} else if (is_nmi(intr_info)) {
-		kvm_before_interrupt(&vmx->vcpu);
-		asm("int $2");
-		kvm_after_interrupt(&vmx->vcpu);
-	}
+	else if (is_nmi(intr_info))
+		handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info);
 }
 
 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 {
-	unsigned int vector;
-	gate_desc *desc;
 	u32 intr_info = vmx_get_intr_info(vcpu);
 
 	if (WARN_ONCE(!is_external_intr(intr_info),
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
-	vector = intr_info & INTR_INFO_VECTOR_MASK;
-	desc = (gate_desc *)host_idt_base + vector;
-
-	kvm_before_interrupt(vcpu);
-	vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
-	kvm_after_interrupt(vcpu);
+	handle_interrupt_nmi_irqoff(vcpu, intr_info);
 }
 
 static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine
  2020-09-15 19:15 ` [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine Sean Christopherson
@ 2020-09-15 19:27   ` Josh Poimboeuf
  2020-09-15 19:38   ` Uros Bizjak
  1 sibling, 0 replies; 16+ messages in thread
From: Josh Poimboeuf @ 2020-09-15 19:27 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, linux-kernel, Uros Bizjak, Andi Kleen

On Tue, Sep 15, 2020 at 12:15:04PM -0700, Sean Christopherson wrote:
> Move the asm blob that invokes the appropriate IRQ handler after VM-Exit
> into a proper subroutine.  Unconditionally create a stack frame in the
> subroutine so that, as objtool sees things, the function has standard
> stack behavior.  The dynamic stack adjustment makes using unwind hints
> problematic.
> 
> Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
> Cc: Uros Bizjak <ubizjak@gmail.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>

-- 
Josh


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine
  2020-09-15 19:15 ` [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine Sean Christopherson
  2020-09-15 19:27   ` Josh Poimboeuf
@ 2020-09-15 19:38   ` Uros Bizjak
  1 sibling, 0 replies; 16+ messages in thread
From: Uros Bizjak @ 2020-09-15 19:38 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, LKML, Josh Poimboeuf, Andi Kleen

On Tue, Sep 15, 2020 at 9:15 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> Move the asm blob that invokes the appropriate IRQ handler after VM-Exit
> into a proper subroutine.  Unconditionally create a stack frame in the
> subroutine so that, as objtool sees things, the function has standard
> stack behavior.  The dynamic stack adjustment makes using unwind hints
> problematic.
>
> Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
> Cc: Uros Bizjak <ubizjak@gmail.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

Acked-by: Uros Bizjak <ubizjak@gmail.com>

Uros.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling
  2020-09-15 19:15 [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling Sean Christopherson
  2020-09-15 19:15 ` [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine Sean Christopherson
  2020-09-15 19:15 ` [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn Sean Christopherson
@ 2020-09-22 13:38 ` Paolo Bonzini
  2 siblings, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2020-09-22 13:38 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	linux-kernel, Josh Poimboeuf, Uros Bizjak, Andi Kleen

On 15/09/20 21:15, Sean Christopherson wrote:
> Clean up KVM's handling of IRQ and NMI exits to move the invocation of the
> IRQ handler to a standalone assembly routine, and to then consolidate the
> NMI handling to use the same indirect call approach instead of using INTn.
> 
> The IRQ cleanup was suggested by Josh Poimboeuf in the context of a false
> postive objtool warning[*].  I believe Josh intended to use UNWIND hints
> instead of trickery to avoid objtool complaints.  I opted for trickery in
> the form of a redundant, but explicit, restoration of RSP after the hidden
> IRET.  AFAICT, there are no existing UNWIND hints that would let objtool
> know that the stack is magically being restored, and adding a new hint to
> save a single MOV <reg>, <reg> instruction seemed like overkill.
> 
> The NMI consolidation was loosely suggested by Andi Kleen.  Andi's actual
> suggestion was to export and directly call the NMI handler, but that's a
> more involved change (unless I'm misunderstanding the wants of the NMI
> handler), whereas piggybacking the IRQ code is simple and seems like a
> worthwhile intermediate step.
> 
> Sean Christopherson (2):
>   KVM: VMX: Move IRQ invocation to assembly subroutine
>   KVM: VMX: Invoke NMI handler via indirect call instead of INTn
> 
>  arch/x86/kvm/vmx/vmenter.S | 34 +++++++++++++++++++++
>  arch/x86/kvm/vmx/vmx.c     | 61 +++++++++++---------------------------
>  2 files changed, 51 insertions(+), 44 deletions(-)
> 

Queued, thanks.

Paolo


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2020-09-15 19:15 ` [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn Sean Christopherson
@ 2021-04-26  9:33   ` Lai Jiangshan
  2021-04-26 10:40     ` Paolo Bonzini
  0 siblings, 1 reply; 16+ messages in thread
From: Lai Jiangshan @ 2021-04-26  9:33 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, LKML, Josh Poimboeuf, Uros Bizjak, Andi Kleen,
	Andy Lutomirski, Steven Rostedt

Add CC: Andy Lutomirski
Add CC: Steven Rostedt

I think this patch made it wrong for NMI.

On Wed, Sep 16, 2020 at 3:27 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> Rework NMI VM-Exit handling to invoke the kernel handler by function
> call instead of INTn.  INTn microcode is relatively expensive, and
> aligning the IRQ and NMI handling will make it easier to update KVM
> should some newfangled method for invoking the handlers come along.
>
> Suggested-by: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 30 +++++++++++++++---------------
>  1 file changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 391f079d9136..b0eca151931d 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6411,40 +6411,40 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
>
>  void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
>
> +static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
> +{
> +       unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
> +       gate_desc *desc = (gate_desc *)host_idt_base + vector;
> +
> +       kvm_before_interrupt(vcpu);
> +       vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
> +       kvm_after_interrupt(vcpu);
> +}
> +
>  static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
>  {
>         u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
>
>         /* if exit due to PF check for async PF */
> -       if (is_page_fault(intr_info)) {
> +       if (is_page_fault(intr_info))
>                 vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
>         /* Handle machine checks before interrupts are enabled */
> -       } else if (is_machine_check(intr_info)) {
> +       else if (is_machine_check(intr_info))
>                 kvm_machine_check();
>         /* We need to handle NMIs before interrupts are enabled */
> -       } else if (is_nmi(intr_info)) {
> -               kvm_before_interrupt(&vmx->vcpu);
> -               asm("int $2");
> -               kvm_after_interrupt(&vmx->vcpu);
> -       }
> +       else if (is_nmi(intr_info))
> +               handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info);
>  }

When handle_interrupt_nmi_irqoff() is called, we may lose the
CPU-hidden-NMI-masked state due to IRET of #DB, #BP or other traps
between VMEXIT and handle_interrupt_nmi_irqoff().

But the NMI handler in the Linux kernel *expects* the CPU-hidden-NMI-masked
state is still set in the CPU for no nested NMI intruding into the beginning
of the handler.

The original code "int $2" can provide the needed CPU-hidden-NMI-masked
when entering #NMI, but I doubt it about this change.

I maybe missed something, especially I haven't read all of the earlier
discussions about the change.  More importantly, I haven't found the original
suggestion from Andi Kleen: (Quote from the cover letter):

The NMI consolidation was loosely suggested by Andi Kleen.  Andi's actual
suggestion was to export and directly call the NMI handler, but that's a
more involved change (unless I'm misunderstanding the wants of the NMI
handler), whereas piggybacking the IRQ code is simple and seems like a
worthwhile intermediate step.
(End of quote)

I think we need to change it back or change it to call the NMI handler
immediately after VMEXIT before leaving "nostr" section if needed.

Thanks,
Lai

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-26  9:33   ` Lai Jiangshan
@ 2021-04-26 10:40     ` Paolo Bonzini
  2021-04-26 11:44       ` Maxim Levitsky
                         ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Paolo Bonzini @ 2021-04-26 10:40 UTC (permalink / raw)
  To: Lai Jiangshan, Sean Christopherson
  Cc: Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	LKML, Josh Poimboeuf, Uros Bizjak, Andi Kleen, Andy Lutomirski,
	Steven Rostedt

On 26/04/21 11:33, Lai Jiangshan wrote:
> When handle_interrupt_nmi_irqoff() is called, we may lose the
> CPU-hidden-NMI-masked state due to IRET of #DB, #BP or other traps
> between VMEXIT and handle_interrupt_nmi_irqoff().
> 
> But the NMI handler in the Linux kernel*expects*  the CPU-hidden-NMI-masked
> state is still set in the CPU for no nested NMI intruding into the beginning
> of the handler.
> 
> The original code "int $2" can provide the needed CPU-hidden-NMI-masked
> when entering #NMI, but I doubt it about this change.

How would "int $2" block NMIs?  The hidden effect of this change (and I 
should have reviewed better the effect on the NMI entry code) is that 
the call will not use the IST anymore.

However, I'm not sure which of the two situations is better: entering 
the NMI handler on the IST without setting the hidden NMI-blocked flag 
could be a recipe for bad things as well.

Paolo


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-26 10:40     ` Paolo Bonzini
@ 2021-04-26 11:44       ` Maxim Levitsky
  2021-04-26 13:59         ` Steven Rostedt
  2021-04-26 14:51       ` Andi Kleen
  2021-04-27  0:54       ` Lai Jiangshan
  2 siblings, 1 reply; 16+ messages in thread
From: Maxim Levitsky @ 2021-04-26 11:44 UTC (permalink / raw)
  To: Paolo Bonzini, Lai Jiangshan, Sean Christopherson
  Cc: Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	LKML, Josh Poimboeuf, Uros Bizjak, Andi Kleen, Andy Lutomirski,
	Steven Rostedt

On Mon, 2021-04-26 at 12:40 +0200, Paolo Bonzini wrote:
> On 26/04/21 11:33, Lai Jiangshan wrote:
> > When handle_interrupt_nmi_irqoff() is called, we may lose the
> > CPU-hidden-NMI-masked state due to IRET of #DB, #BP or other traps
> > between VMEXIT and handle_interrupt_nmi_irqoff().
> > 
> > But the NMI handler in the Linux kernel*expects*  the CPU-hidden-NMI-masked
> > state is still set in the CPU for no nested NMI intruding into the beginning
> > of the handler.
> > 
> > The original code "int $2" can provide the needed CPU-hidden-NMI-masked
> > when entering #NMI, but I doubt it about this change.
> 
> How would "int $2" block NMIs?  The hidden effect of this change (and I 
> should have reviewed better the effect on the NMI entry code) is that 
> the call will not use the IST anymore.
> 
> However, I'm not sure which of the two situations is better: entering 
> the NMI handler on the IST without setting the hidden NMI-blocked flag 
> could be a recipe for bad things as well.

If I understand this correctly, we can't really set the NMI blocked flag
on Intel, but only keep it from beeing cleared by an iret after it 
was set by the intercepted NMI.

Thus the goal of this patchset was to make sure that we don't
call any interrupt handlers that can do iret before we call the NMI handler

Indeed I don't think that doing int $2 helps, unless I miss something.
We just need to make sure that we call the NMI handler as soon as possible.


If only Intel had the GI flag....


My 0.2 cents.

Best regards,
	Maxim Levitsky
> 
> Paolo
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-26 11:44       ` Maxim Levitsky
@ 2021-04-26 13:59         ` Steven Rostedt
  0 siblings, 0 replies; 16+ messages in thread
From: Steven Rostedt @ 2021-04-26 13:59 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Paolo Bonzini, Lai Jiangshan, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	LKML, Josh Poimboeuf, Uros Bizjak, Andi Kleen, Andy Lutomirski

On Mon, 26 Apr 2021 14:44:49 +0300
Maxim Levitsky <mlevitsk@redhat.com> wrote:

> On Mon, 2021-04-26 at 12:40 +0200, Paolo Bonzini wrote:
> > On 26/04/21 11:33, Lai Jiangshan wrote:  
> > > When handle_interrupt_nmi_irqoff() is called, we may lose the
> > > CPU-hidden-NMI-masked state due to IRET of #DB, #BP or other traps
> > > between VMEXIT and handle_interrupt_nmi_irqoff().
> > > 
> > > But the NMI handler in the Linux kernel*expects*  the CPU-hidden-NMI-masked
> > > state is still set in the CPU for no nested NMI intruding into the beginning
> > > of the handler.

This is incorrect. The Linux kernel has for some time handled the case of
nested NMIs. It had to, to implement the ftrace break point updates, as it
would trigger an int3 in an NMI which would "unmask" the NMIs. It has also
been a long time bug where a page fault could do the same (the reason you
could never do a dump all tasks from NMI without triple faulting!).

But that's been fixed a long time ago, and I even wrote an LWN article
about it ;-)

 https://lwn.net/Articles/484932/

The NMI handler can handle the case of nested NMIs, and implements a
software "latch" to remember that another NMI is to be executed, if there
is a nested one. And it does so after the first one has finished.

-- Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-26 10:40     ` Paolo Bonzini
  2021-04-26 11:44       ` Maxim Levitsky
@ 2021-04-26 14:51       ` Andi Kleen
  2021-04-26 15:09         ` Andy Lutomirski
  2021-04-27  0:54       ` Lai Jiangshan
  2 siblings, 1 reply; 16+ messages in thread
From: Andi Kleen @ 2021-04-26 14:51 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Lai Jiangshan, Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, kvm, LKML, Josh Poimboeuf,
	Uros Bizjak, Andy Lutomirski, Steven Rostedt

> > The original code "int $2" can provide the needed CPU-hidden-NMI-masked
> > when entering #NMI, but I doubt it about this change.
> 
> How would "int $2" block NMIs?  The hidden effect of this change (and I
> should have reviewed better the effect on the NMI entry code) is that the
> call will not use the IST anymore.

My understanding is that int $2 does not block NMIs.

So reentries might have been possible.

-Andi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-26 14:51       ` Andi Kleen
@ 2021-04-26 15:09         ` Andy Lutomirski
  0 siblings, 0 replies; 16+ messages in thread
From: Andy Lutomirski @ 2021-04-26 15:09 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Paolo Bonzini, Lai Jiangshan, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	LKML, Josh Poimboeuf, Uros Bizjak, Andy Lutomirski,
	Steven Rostedt


> On Apr 26, 2021, at 7:51 AM, Andi Kleen <ak@linux.intel.com> wrote:
> 
> 
>> 
>>> The original code "int $2" can provide the needed CPU-hidden-NMI-masked
>>> when entering #NMI, but I doubt it about this change.
>> 
>> How would "int $2" block NMIs?  The hidden effect of this change (and I
>> should have reviewed better the effect on the NMI entry code) is that the
>> call will not use the IST anymore.
> 
> My understanding is that int $2 does not block NMIs.
> 
> So reentries might have been possible.
> 

The C NMI code has its own reentrancy protection and has for years.  It should work fine for this use case.

> -Andi

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-26 10:40     ` Paolo Bonzini
  2021-04-26 11:44       ` Maxim Levitsky
  2021-04-26 14:51       ` Andi Kleen
@ 2021-04-27  0:54       ` Lai Jiangshan
  2021-04-27  1:00         ` Steven Rostedt
  2021-04-27  7:05         ` Paolo Bonzini
  2 siblings, 2 replies; 16+ messages in thread
From: Lai Jiangshan @ 2021-04-27  0:54 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	LKML, Josh Poimboeuf, Uros Bizjak, Andi Kleen, Andy Lutomirski,
	Steven Rostedt, Sean Christopherson, Maxim Levitsky

(Correct Sean Christopherson's email address)

On Mon, Apr 26, 2021 at 6:40 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 26/04/21 11:33, Lai Jiangshan wrote:
> > When handle_interrupt_nmi_irqoff() is called, we may lose the
> > CPU-hidden-NMI-masked state due to IRET of #DB, #BP or other traps
> > between VMEXIT and handle_interrupt_nmi_irqoff().
> >
> > But the NMI handler in the Linux kernel*expects*  the CPU-hidden-NMI-masked
> > state is still set in the CPU for no nested NMI intruding into the beginning
> > of the handler.
> >
> > The original code "int $2" can provide the needed CPU-hidden-NMI-masked
> > when entering #NMI, but I doubt it about this change.
>
> How would "int $2" block NMIs?

Sorry, I haven't checked it.

> The hidden effect of this change (and I
> should have reviewed better the effect on the NMI entry code) is that
> the call will not use the IST anymore.
>
> However, I'm not sure which of the two situations is better: entering
> the NMI handler on the IST without setting the hidden NMI-blocked flag
> could be a recipe for bad things as well.

The change makes the ASM NMI entry called on the kernel stack.  But the
ASM NMI entry expects it on the IST stack and it plays with "NMI executing"
variable on the IST stack.  In this change, the stranded ASM NMI entry
will use the wrong/garbage "NMI executing" variable on the kernel stack
and may do some very wrong thing.

On Mon, Apr 26, 2021 at 9:59 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> > > > But the NMI handler in the Linux kernel*expects*  the CPU-hidden-NMI-masked
> > > > state is still set in the CPU for no nested NMI intruding into the beginning
> > > > of the handler.
>
>
> This is incorrect. The Linux kernel has for some time handled the case of
> nested NMIs. It had to, to implement the ftrace break point updates, as it
> would trigger an int3 in an NMI which would "unmask" the NMIs. It has also
> been a long time bug where a page fault could do the same (the reason you
> could never do a dump all tasks from NMI without triple faulting!).
>
> But that's been fixed a long time ago, and I even wrote an LWN article
> about it ;-)
>
>  https://lwn.net/Articles/484932/
>
> The NMI handler can handle the case of nested NMIs, and implements a
> software "latch" to remember that another NMI is to be executed, if there
> is a nested one. And it does so after the first one has finished.

Sorry, in my reply, "the NMI handler" meant to be the ASM entry installed
on the IDT table which really expects to be NMI-masked at the beginning.

The C NMI handler can handle the case of nested NMIs, which is useful
here.  I think we should change it to call the C NMI handler directly
here as Andy Lutomirski suggested:

On Mon, Apr 26, 2021 at 11:09 PM Andy Lutomirski <luto@amacapital.net> wrote:
> The C NMI code has its own reentrancy protection and has for years.
> It should work fine for this use case.

I think this is the right way.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-27  0:54       ` Lai Jiangshan
@ 2021-04-27  1:00         ` Steven Rostedt
  2021-04-27  7:05         ` Paolo Bonzini
  1 sibling, 0 replies; 16+ messages in thread
From: Steven Rostedt @ 2021-04-27  1:00 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, kvm, LKML, Josh Poimboeuf, Uros Bizjak, Andi Kleen,
	Andy Lutomirski, Sean Christopherson, Maxim Levitsky

On Tue, 27 Apr 2021 08:54:37 +0800
Lai Jiangshan <jiangshanlai+lkml@gmail.com> wrote:

> > However, I'm not sure which of the two situations is better: entering
> > the NMI handler on the IST without setting the hidden NMI-blocked flag
> > could be a recipe for bad things as well.  
> 
> The change makes the ASM NMI entry called on the kernel stack.  But the
> ASM NMI entry expects it on the IST stack and it plays with "NMI executing"
> variable on the IST stack.  In this change, the stranded ASM NMI entry
> will use the wrong/garbage "NMI executing" variable on the kernel stack
> and may do some very wrong thing.

I missed this detail.

> 
> Sorry, in my reply, "the NMI handler" meant to be the ASM entry installed
> on the IDT table which really expects to be NMI-masked at the beginning.
> 
> The C NMI handler can handle the case of nested NMIs, which is useful
> here.  I think we should change it to call the C NMI handler directly
> here as Andy Lutomirski suggested:

Yes, because that's the way x86_32 works.

> 
> On Mon, Apr 26, 2021 at 11:09 PM Andy Lutomirski <luto@amacapital.net> wrote:
> > The C NMI code has its own reentrancy protection and has for years.
> > It should work fine for this use case.  
> 
> I think this is the right way.

Agreed.

-- Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-27  0:54       ` Lai Jiangshan
  2021-04-27  1:00         ` Steven Rostedt
@ 2021-04-27  7:05         ` Paolo Bonzini
  2021-04-30  2:56           ` Lai Jiangshan
  1 sibling, 1 reply; 16+ messages in thread
From: Paolo Bonzini @ 2021-04-27  7:05 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	LKML, Josh Poimboeuf, Uros Bizjak, Andi Kleen, Andy Lutomirski,
	Steven Rostedt, Sean Christopherson, Maxim Levitsky

On 27/04/21 02:54, Lai Jiangshan wrote:
> The C NMI handler can handle the case of nested NMIs, which is useful
> here.  I think we should change it to call the C NMI handler directly
> here as Andy Lutomirski suggested:

Great, can you send a patch?

Paolo

> On Mon, Apr 26, 2021 at 11:09 PM Andy Lutomirski <luto@amacapital.net> wrote:
>> The C NMI code has its own reentrancy protection and has for years.
>> It should work fine for this use case.
> 
> I think this is the right way.
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn
  2021-04-27  7:05         ` Paolo Bonzini
@ 2021-04-30  2:56           ` Lai Jiangshan
  0 siblings, 0 replies; 16+ messages in thread
From: Lai Jiangshan @ 2021-04-30  2:56 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel, kvm,
	LKML, Josh Poimboeuf, Uros Bizjak, Andi Kleen, Andy Lutomirski,
	Steven Rostedt, Sean Christopherson, Maxim Levitsky,
	Lai Jiangshan

On Tue, Apr 27, 2021 at 3:05 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 27/04/21 02:54, Lai Jiangshan wrote:
> > The C NMI handler can handle the case of nested NMIs, which is useful
> > here.  I think we should change it to call the C NMI handler directly
> > here as Andy Lutomirski suggested:
>
> Great, can you send a patch?
>

Hello, I sent it several days ago, could you have a review please, and
then I will update
the patchset with feedbacks applied. And thanks Steven for the reviews.

https://lore.kernel.org/lkml/20210426230949.3561-4-jiangshanlai@gmail.com/

thanks
Lai

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-04-30  2:56 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-15 19:15 [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling Sean Christopherson
2020-09-15 19:15 ` [PATCH v2 1/2] KVM: VMX: Move IRQ invocation to assembly subroutine Sean Christopherson
2020-09-15 19:27   ` Josh Poimboeuf
2020-09-15 19:38   ` Uros Bizjak
2020-09-15 19:15 ` [PATCH v2 2/2] KVM: VMX: Invoke NMI handler via indirect call instead of INTn Sean Christopherson
2021-04-26  9:33   ` Lai Jiangshan
2021-04-26 10:40     ` Paolo Bonzini
2021-04-26 11:44       ` Maxim Levitsky
2021-04-26 13:59         ` Steven Rostedt
2021-04-26 14:51       ` Andi Kleen
2021-04-26 15:09         ` Andy Lutomirski
2021-04-27  0:54       ` Lai Jiangshan
2021-04-27  1:00         ` Steven Rostedt
2021-04-27  7:05         ` Paolo Bonzini
2021-04-30  2:56           ` Lai Jiangshan
2020-09-22 13:38 ` [PATCH v2 0/2] KVM: VMX: Clean up IRQ/NMI handling Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).