All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@intel.com, luto@kernel.org, peterz@infradead.org
Cc: sathyanarayanan.kuppuswamy@linux.intel.com, aarcange@redhat.com,
	ak@linux.intel.com, dan.j.williams@intel.com, david@redhat.com,
	hpa@zytor.com, jgross@suse.com, jmattson@google.com,
	joro@8bytes.org, jpoimboe@redhat.com, knsathya@kernel.org,
	pbonzini@redhat.com, sdeep@vmware.com, seanjc@google.com,
	tony.luck@intel.com, vkuznets@redhat.com, wanpengli@tencent.com,
	thomas.lendacky@amd.com, brijesh.singh@amd.com, x86@kernel.org,
	linux-kernel@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Dave Hansen <dave.hansen@linux.intel.com>
Subject: [PATCHv8 08/30] x86/tdx: Add HLT support for TDX guests
Date: Wed,  6 Apr 2022 02:29:17 +0300	[thread overview]
Message-ID: <20220405232939.73860-9-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20220405232939.73860-1-kirill.shutemov@linux.intel.com>

The HLT instruction is a privileged instruction, executing it stops
instruction execution and places the processor in a HALT state. It
is used in kernel for cases like reboot, idle loop and exception fixup
handlers. For the idle case, interrupts will be enabled (using STI)
before the HLT instruction (this is also called safe_halt()).

To support the HLT instruction in TDX guests, it needs to be emulated
using TDVMCALL (hypercall to VMM). More details about it can be found
in Intel Trust Domain Extensions (Intel TDX) Guest-Host-Communication
Interface (GHCI) specification, section TDVMCALL[Instruction.HLT].

In TDX guests, executing HLT instruction will generate a #VE, which is
used to emulate the HLT instruction. But #VE based emulation will not
work for the safe_halt() flavor, because it requires STI instruction to
be executed just before the TDCALL. Since idle loop is the only user of
safe_halt() variant, handle it as a special case.

To avoid *safe_halt() call in the idle function, define the
tdx_guest_idle() and use it to override the "x86_idle" function pointer
for a valid TDX guest.

Alternative choices like PV ops have been considered for adding
safe_halt() support. But it was rejected because HLT paravirt calls
only exist under PARAVIRT_XXL, and enabling it in TDX guest just for
safe_halt() use case is not worth the cost.

Co-developed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
---
 arch/x86/coco/tdx/tdcall.S | 13 ++++++
 arch/x86/coco/tdx/tdx.c    | 93 +++++++++++++++++++++++++++++++++++++-
 arch/x86/include/asm/tdx.h |  4 ++
 arch/x86/kernel/process.c  |  4 ++
 4 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
index 662479ccf630..245888290bb6 100644
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -139,6 +139,19 @@ SYM_FUNC_START(__tdx_hypercall)
 
 	movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
 
+	/*
+	 * For the idle loop STI needs to be called directly before the TDCALL
+	 * that enters idle (EXIT_REASON_HLT case). STI instruction enables
+	 * interrupts only one instruction later. If there is a window between
+	 * STI and the instruction that emulates the HALT state, there is a
+	 * chance for interrupts to happen in this window, which can delay the
+	 * HLT operation indefinitely. Since this is the not the desired
+	 * result, conditionally call STI before TDCALL.
+	 */
+	testq $TDX_HCALL_ISSUE_STI, %rsi
+	jz .Lskip_sti
+	sti
+.Lskip_sti:
 	tdcall
 
 	/*
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 60a3f2ff5b95..ed7302581cc7 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -7,6 +7,7 @@
 #include <linux/cpufeature.h>
 #include <asm/coco.h>
 #include <asm/tdx.h>
+#include <asm/vmx.h>
 
 /* TDX module Call Leaf IDs */
 #define TDX_GET_INFO			1
@@ -36,6 +37,17 @@ void __tdx_hypercall_failed(void)
 	panic("TDVMCALL failed. TDX module bug?");
 }
 
+/*
+ * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
+ * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
+ * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
+ * guest sides of these calls.
+ */
+static u64 hcall_func(u64 exit_reason)
+{
+	return exit_reason;
+}
+
 /*
  * Used for TDX guests to make calls directly to the TD module.  This
  * should only be used for calls that have no legitimate reason to fail
@@ -74,6 +86,62 @@ static u64 get_cc_mask(void)
 	return BIT_ULL(gpa_width - 1);
 }
 
+static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
+{
+	struct tdx_hypercall_args args = {
+		.r10 = TDX_HYPERCALL_STANDARD,
+		.r11 = hcall_func(EXIT_REASON_HLT),
+		.r12 = irq_disabled,
+	};
+
+	/*
+	 * Emulate HLT operation via hypercall. More info about ABI
+	 * can be found in TDX Guest-Host-Communication Interface
+	 * (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>.
+	 *
+	 * The VMM uses the "IRQ disabled" param to understand IRQ
+	 * enabled status (RFLAGS.IF) of the TD guest and to determine
+	 * whether or not it should schedule the halted vCPU if an
+	 * IRQ becomes pending. E.g. if IRQs are disabled, the VMM
+	 * can keep the vCPU in virtual HLT, even if an IRQ is
+	 * pending, without hanging/breaking the guest.
+	 */
+	return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
+}
+
+static bool handle_halt(void)
+{
+	/*
+	 * Since non safe halt is mainly used in CPU offlining
+	 * and the guest will always stay in the halt state, don't
+	 * call the STI instruction (set do_sti as false).
+	 */
+	const bool irq_disabled = irqs_disabled();
+	const bool do_sti = false;
+
+	if (__halt(irq_disabled, do_sti))
+		return false;
+
+	return true;
+}
+
+void __cpuidle tdx_safe_halt(void)
+{
+	 /*
+	  * For do_sti=true case, __tdx_hypercall() function enables
+	  * interrupts using the STI instruction before the TDCALL. So
+	  * set irq_disabled as false.
+	  */
+	const bool irq_disabled = false;
+	const bool do_sti = true;
+
+	/*
+	 * Use WARN_ONCE() to report the failure.
+	 */
+	if (__halt(irq_disabled, do_sti))
+		WARN_ONCE(1, "HLT instruction emulation failed\n");
+}
+
 void tdx_get_ve_info(struct ve_info *ve)
 {
 	struct tdx_module_output out;
@@ -104,11 +172,32 @@ void tdx_get_ve_info(struct ve_info *ve)
 	ve->instr_info  = upper_32_bits(out.r10);
 }
 
+/* Handle the kernel #VE */
+static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
+{
+	switch (ve->exit_reason) {
+	case EXIT_REASON_HLT:
+		return handle_halt();
+	default:
+		pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+		return false;
+	}
+}
+
 bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
 {
-	pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
+	bool ret;
+
+	if (user_mode(regs))
+		ret = false;
+	else
+		ret = virt_exception_kernel(regs, ve);
+
+	/* After successful #VE handling, move the IP */
+	if (ret)
+		regs->ip += ve->instr_len;
 
-	return false;
+	return ret;
 }
 
 void __init tdx_early_init(void)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index c4142e7b004c..cbd61e142f4e 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -14,6 +14,7 @@
 #define TDX_HYPERCALL_STANDARD  0
 
 #define TDX_HCALL_HAS_OUTPUT	BIT(0)
+#define TDX_HCALL_ISSUE_STI	BIT(1)
 
 /*
  * SW-defined error codes.
@@ -91,9 +92,12 @@ void tdx_get_ve_info(struct ve_info *ve);
 
 bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
 
+void tdx_safe_halt(void);
+
 #else
 
 static inline void tdx_early_init(void) { };
+static inline void tdx_safe_halt(void) { };
 
 #endif /* CONFIG_INTEL_TDX_GUEST */
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b370767f5b19..dbaf12c43fe1 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -46,6 +46,7 @@
 #include <asm/proto.h>
 #include <asm/frame.h>
 #include <asm/unwind.h>
+#include <asm/tdx.h>
 
 #include "process.h"
 
@@ -873,6 +874,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
 	} else if (prefer_mwait_c1_over_halt(c)) {
 		pr_info("using mwait in idle threads\n");
 		x86_idle = mwait_idle;
+	} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+		pr_info("using TDX aware idle routine\n");
+		x86_idle = tdx_safe_halt;
 	} else
 		x86_idle = default_idle;
 }
-- 
2.35.1


  parent reply	other threads:[~2022-04-06  3:16 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-05 23:29 [PATCHv8 00/30] TDX Guest: TDX core support Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 01/30] x86/tdx: Detect running as a TDX guest in early boot Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kuppuswamy Sathyanarayanan
2022-04-05 23:29 ` [PATCHv8 02/30] x86/tdx: Provide common base for SEAMCALL and TDCALL C wrappers Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 03/30] x86/tdx: Add __tdx_module_call() and __tdx_hypercall() helper functions Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kuppuswamy Sathyanarayanan
2022-05-20  8:38     ` [PATCH] x86/tdx: Fix tdx asm Peter Zijlstra
2022-05-20 11:00       ` [tip: x86/tdx] x86/tdx: Fix RETs in TDX asm tip-bot2 for Peter Zijlstra
2022-05-20 13:59       ` [PATCH] x86/tdx: Fix tdx asm Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 04/30] x86/tdx: Extend the confidential computing API to support TDX guests Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 05/30] x86/tdx: Exclude shared bit from __PHYSICAL_MASK Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 06/30] x86/traps: Refactor exc_general_protection() Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 07/30] x86/traps: Add #VE support for TDX guest Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` Kirill A. Shutemov [this message]
2022-04-09  1:27   ` [tip: x86/tdx] x86/tdx: Add HLT support for TDX guests tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 09/30] x86/tdx: Add MSR " Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 10/30] x86/tdx: Handle CPUID via #VE Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 11/30] x86/tdx: Handle in-kernel MMIO Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 12/30] x86/tdx: Detect TDX at early kernel decompression time Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kuppuswamy Sathyanarayanan
2022-04-05 23:29 ` [PATCHv8 13/30] x86: Adjust types used in port I/O helpers Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 14/30] x86: Consolidate " Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-10 10:58   ` [PATCHv8 14/30] " Borislav Petkov
2022-04-10 20:00     ` Kirill A. Shutemov
2022-04-10 20:37       ` Borislav Petkov
2022-04-11  7:49       ` [tip: x86/tdx] x86/kaslr: Fix build warning in KASLR code in boot stub tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 15/30] x86/boot: Port I/O: allow to hook up alternative helpers Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] x86/boot: Port I/O: Allow " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 16/30] x86/boot: Port I/O: add decompression-time support for TDX Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] x86/boot: Port I/O: Add " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 17/30] x86/tdx: Port I/O: add runtime hypercalls Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] x86/tdx: Port I/O: Add " tip-bot2 for Kuppuswamy Sathyanarayanan
2022-04-05 23:29 ` [PATCHv8 18/30] x86/tdx: Port I/O: add early boot support Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] x86/tdx: Port I/O: Add " tip-bot2 for Andi Kleen
2022-04-05 23:29 ` [PATCHv8 19/30] x86/tdx: Wire up KVM hypercalls Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kuppuswamy Sathyanarayanan
2022-04-05 23:29 ` [PATCHv8 20/30] x86/boot: Add a trampoline for booting APs via firmware handoff Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Sean Christopherson
2022-04-05 23:29 ` [PATCHv8 21/30] x86/acpi, x86/boot: Add multiprocessor wake-up support Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] x86/acpi/x86/boot: " tip-bot2 for Kuppuswamy Sathyanarayanan
2022-04-05 23:29 ` [PATCHv8 22/30] x86/boot: Set CR0.NE early and keep it set during the boot Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 23/30] x86/boot: Avoid #VE during boot for TDX platforms Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Sean Christopherson
2022-04-05 23:29 ` [PATCHv8 24/30] x86/topology: Disable CPU online/offline control for TDX guests Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kuppuswamy Sathyanarayanan
2022-04-05 23:29 ` [PATCHv8 25/30] x86/tdx: Make pages shared in ioremap() Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 26/30] x86/mm/cpa: Add support for TDX shared memory Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 27/30] x86/mm: Make DMA memory shared for TD guest Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 28/30] x86/tdx: ioapic: Add shared bit for IOAPIC base address Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] x86/tdx/ioapic: " tip-bot2 for Isaku Yamahata
2022-04-05 23:29 ` [PATCHv8 29/30] ACPICA: Avoid cache flush inside virtual machines Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kirill A. Shutemov
2022-04-05 23:29 ` [PATCHv8 30/30] Documentation/x86: Document TDX kernel architecture Kirill A. Shutemov
2022-04-09  1:27   ` [tip: x86/tdx] " tip-bot2 for Kuppuswamy Sathyanarayanan
2022-04-07 16:36 ` [PATCHv8 00/30] TDX Guest: TDX core support Dave Hansen
2022-04-07 16:50   ` Sean Christopherson
2022-04-07 17:42     ` Tom Lendacky
2022-04-07 17:47     ` Kirill A. Shutemov
2022-04-07 18:53       ` Sean Christopherson
2022-04-08 11:01         ` Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220405232939.73860-9-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=aarcange@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=brijesh.singh@amd.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=jpoimboe@redhat.com \
    --cc=knsathya@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=sdeep@vmware.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=tony.luck@intel.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.