All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: Lai Jiangshan <jiangshanlai@gmail.com>, linux-kernel@vger.kernel.org
Cc: Lai Jiangshan <laijs@linux.alibaba.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Andi Kleen <ak@linux.intel.com>,
	Andy Lutomirski <luto@kernel.org>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Uros Bizjak <ubizjak@gmail.com>,
	Maxim Levitsky <mlevitsk@redhat.com>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Alexandre Chartre <alexandre.chartre@oracle.com>,
	Juergen Gross <jgross@suse.com>, Joerg Roedel <jroedel@suse.de>,
	Jian Cai <caij2003@gmail.com>
Subject: [PATCH] KVM/VMX: Invoke NMI non-IST entry instead of IST entry
Date: Tue, 04 May 2021 21:50:14 +0200	[thread overview]
Message-ID: <87r1imi8i1.ffs@nanos.tec.linutronix.de> (raw)
In-Reply-To: <87wnseis8v.ffs@nanos.tec.linutronix.de>

From: Lai Jiangshan <laijs@linux.alibaba.com>

In VMX, the host NMI handler needs to be invoked after NMI VM-Exit.
Before commit 1a5488ef0dcf6 ("KVM: VMX: Invoke NMI handler via indirect
call instead of INTn"), this was done by INTn ("int $2"). But INTn
microcode is relatively expensive, so the commit reworked NMI VM-Exit
handling to invoke the kernel handler by function call.

But this missed a detail. The NMI entry point for direct invocation is
fetched from the IDT table and called on the kernel stack.  But on 64-bit
the NMI entry installed in the IDT expects to be invoked on the IST stack.
It relies on the "NMI executing" variable on the IST stack to work
correctly, which is at a fixed position in the IST stack.  When the entry
point is unexpectedly called on the kernel stack, the RSP-addressed "NMI
executing" variable is obviously also on the kernel stack and is
"uninitialized" and can cause the NMI entry code to run in the wrong way.

Provide a non-ist entry point for VMX which shares the C-function with
the regular NMI entry and invoke the new asm entry point instead.

On 32-bit this just maps to the regular NMI entry point as 32-bit has no
ISTs and is not affected.

[ tglx: Made it independent for backporting, massaged changelog ]

Fixes: 1a5488ef0dcf6 ("KVM: VMX: Invoke NMI handler via indirect call instead of INTn")
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---

Note: That's the minimal fix which needs to be backported and the other
      stuff is cleanup material on top for 5.14.

---
 arch/x86/include/asm/idtentry.h |   15 +++++++++++++++
 arch/x86/kernel/nmi.c           |   10 ++++++++++
 arch/x86/kvm/vmx/vmx.c          |   16 +++++++++-------
 3 files changed, 34 insertions(+), 7 deletions(-)

--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -588,6 +588,21 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	xenpv_
 #endif
 
 /* NMI */
+
+#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * Special NOIST entry point for VMX which invokes this on the kernel
+ * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI
+ * 'executing' marker.
+ *
+ * On 32bit this just uses the regular NMI entry point because 32-bit does
+ * not have ISTs.
+ */
+DECLARE_IDTENTRY(X86_TRAP_NMI,		exc_nmi_noist);
+#else
+#define asm_exc_nmi_noist		asm_exc_nmi
+#endif
+
 DECLARE_IDTENTRY_NMI(X86_TRAP_NMI,	exc_nmi);
 #ifdef CONFIG_XEN_PV
 DECLARE_IDTENTRY_RAW(X86_TRAP_NMI,	xenpv_exc_nmi);
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -524,6 +524,16 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
 		mds_user_clear_cpu_buffers();
 }
 
+#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+DEFINE_IDTENTRY_RAW(exc_nmi_noist)
+{
+	exc_nmi(regs);
+}
+#endif
+#if IS_MODULE(CONFIG_KVM_INTEL)
+EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
+#endif
+
 void stop_nmi(void)
 {
 	ignore_nmis++;
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -36,6 +36,7 @@
 #include <asm/debugreg.h>
 #include <asm/desc.h>
 #include <asm/fpu/internal.h>
+#include <asm/idtentry.h>
 #include <asm/io.h>
 #include <asm/irq_remapping.h>
 #include <asm/kexec.h>
@@ -6415,18 +6416,17 @@ static void vmx_apicv_post_state_restore
 
 void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
 
-static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
+static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
+					unsigned long entry)
 {
-	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
-	gate_desc *desc = (gate_desc *)host_idt_base + vector;
-
 	kvm_before_interrupt(vcpu);
-	vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
+	vmx_do_interrupt_nmi_irqoff(entry);
 	kvm_after_interrupt(vcpu);
 }
 
 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
 {
+	const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist;
 	u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
 
 	/* if exit due to PF check for async PF */
@@ -6437,18 +6437,20 @@ static void handle_exception_nmi_irqoff(
 		kvm_machine_check();
 	/* We need to handle NMIs before interrupts are enabled */
 	else if (is_nmi(intr_info))
-		handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info);
+		handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
 }
 
 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 {
 	u32 intr_info = vmx_get_intr_info(vcpu);
+	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
+	gate_desc *desc = (gate_desc *)host_idt_base + vector;
 
 	if (WARN_ONCE(!is_external_intr(intr_info),
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
-	handle_interrupt_nmi_irqoff(vcpu, intr_info);
+	handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
 }
 
 static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)

  reply	other threads:[~2021-05-04 19:50 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-26 23:09 [PATCH 0/4] x86: Don't invoke asm_exc_nmi() on the kernel stack Lai Jiangshan
2021-04-26 23:09 ` [PATCH 1/4] x86/xen/entry: Rename xenpv_exc_nmi to noist_exc_nmi Lai Jiangshan
2021-04-28 21:27   ` Steven Rostedt
2021-04-30  7:15     ` Paolo Bonzini
2021-04-30 12:05       ` Steven Rostedt
2021-05-03 19:05   ` Thomas Gleixner
2021-05-03 19:41     ` Thomas Gleixner
2021-05-10  7:59   ` Juergen Gross
2021-04-26 23:09 ` [PATCH 2/4] x86/entry: Use asm_noist_exc_nmi() for NMI in early booting stage Lai Jiangshan
2021-04-28 21:30   ` Steven Rostedt
2021-05-03 20:13   ` Thomas Gleixner
2021-05-03 20:24     ` Thomas Gleixner
2021-05-03 21:45       ` Thomas Gleixner
2021-05-04 12:43         ` Thomas Gleixner
2021-05-04 19:50           ` Thomas Gleixner [this message]
2021-05-04 21:05             ` [PATCH] KVM/VMX: Invoke NMI non-IST entry instead of IST entry Maxim Levitsky
2021-05-04 21:12               ` Paolo Bonzini
2021-05-04 21:21                 ` Sean Christopherson
2021-05-04 21:23                   ` Andy Lutomirski
2021-05-04 21:25                     ` Paolo Bonzini
2021-05-04 21:51                       ` Sean Christopherson
2021-05-04 21:56                         ` Paolo Bonzini
2021-05-05  0:00                           ` Thomas Gleixner
2021-05-05 15:44                             ` Lai Jiangshan
2021-05-05  1:07                     ` Lai Jiangshan
2021-05-05  1:11                       ` Andy Lutomirski
2021-05-06 12:14             ` [tip: x86/urgent] " tip-bot2 for Lai Jiangshan
2021-04-26 23:09 ` [PATCH 3/4] " Lai Jiangshan
2021-04-30  2:46   ` Lai Jiangshan
2021-05-03 19:37   ` Thomas Gleixner
2021-05-03 20:02   ` Thomas Gleixner
2021-05-04  8:10     ` Paolo Bonzini
2021-04-26 23:09 ` [PATCH 4/4] KVM/VMX: Fold handle_interrupt_nmi_irqoff() into its solo caller Lai Jiangshan
2021-04-30  9:03   ` Thomas Gleixner
2021-04-30  9:06     ` Paolo Bonzini
2021-04-30 23:28       ` Thomas Gleixner
2021-04-30  7:14 ` [PATCH 0/4] x86: Don't invoke asm_exc_nmi() on the kernel stack Paolo Bonzini
2021-05-03 14:36   ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87r1imi8i1.ffs@nanos.tec.linutronix.de \
    --to=tglx@linutronix.de \
    --cc=ak@linux.intel.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=bp@alien8.de \
    --cc=caij2003@gmail.com \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=jiangshanlai@gmail.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=jpoimboe@redhat.com \
    --cc=jroedel@suse.de \
    --cc=laijs@linux.alibaba.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=mlevitsk@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=seanjc@google.com \
    --cc=ubizjak@gmail.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.