All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@intel.com, luto@kernel.org, peterz@infradead.org
Cc: sathyanarayanan.kuppuswamy@linux.intel.com, aarcange@redhat.com,
	ak@linux.intel.com, dan.j.williams@intel.com, david@redhat.com,
	hpa@zytor.com, jgross@suse.com, jmattson@google.com,
	joro@8bytes.org, jpoimboe@redhat.com, knsathya@kernel.org,
	pbonzini@redhat.com, sdeep@vmware.com, seanjc@google.com,
	tony.luck@intel.com, vkuznets@redhat.com, wanpengli@tencent.com,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCHv3 24/32] x86/boot: Avoid #VE during boot for TDX platforms
Date: Fri, 18 Feb 2022 19:17:10 +0300	[thread overview]
Message-ID: <20220218161718.67148-25-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <20220218161718.67148-1-kirill.shutemov@linux.intel.com>

From: Sean Christopherson <seanjc@google.com>

There are a few MSRs and control register bits that the kernel
normally needs to modify during boot. But, TDX disallows
modification of these registers to help provide consistent security
guarantees. Fortunately, TDX ensures that these are all in the correct
state before the kernel loads, which means the kernel does not need to
modify them.

The conditions to avoid are:

 * Any writes to the EFER MSR
 * Clearing CR3.MCE

This theoretically makes the guest boot more fragile. If, for instance,
EFER was set up incorrectly and a WRMSR was performed, it will trigger
early exception panic or a triple fault, if it's before early
exceptions are set up. However, this is likely to trip up the guest
BIOS long before control reaches the kernel. In any case, these kinds
of problems are unlikely to occur in production environments, and
developers have good debug tools to fix them quickly.

Change the common boot code to work on TDX and non-TDX systems.
This should have no functional effect on non-TDX systems.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/Kconfig                     |  1 +
 arch/x86/boot/compressed/head_64.S   | 20 ++++++++++++++++++--
 arch/x86/boot/compressed/pgtable.h   |  2 +-
 arch/x86/kernel/head_64.S            | 28 ++++++++++++++++++++++++++--
 arch/x86/realmode/rm/trampoline_64.S | 13 ++++++++++++-
 5 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 85591ecc8473..3701e13e319c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -886,6 +886,7 @@ config INTEL_TDX_GUEST
 	depends on X86_X2APIC
 	select ARCH_HAS_CC_PLATFORM
 	select DYNAMIC_PHYSICAL_MASK
+	select X86_MCE
 	help
 	  Support running as a guest under Intel TDX.  Without this support,
 	  the guest kernel can not boot or run under TDX.
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d0c3d33f3542..6d903b2fc544 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -643,12 +643,28 @@ SYM_CODE_START(trampoline_32bit_src)
 	movl	$MSR_EFER, %ecx
 	rdmsr
 	btsl	$_EFER_LME, %eax
+	/* Avoid writing EFER if no change was made (for TDX guest) */
+	jc	1f
 	wrmsr
-	popl	%edx
+1:	popl	%edx
 	popl	%ecx
 
+#ifdef CONFIG_X86_MCE
+	/*
+	 * Preserve CR4.MCE if the kernel will enable #MC support.
+	 * Clearing MCE may fault in some environments (that also force #MC
+	 * support). Any machine check that occurs before #MC support is fully
+	 * configured will crash the system regardless of the CR4.MCE value set
+	 * here.
+	 */
+	movl	%cr4, %eax
+	andl	$X86_CR4_MCE, %eax
+#else
+	movl	$0, %eax
+#endif
+
 	/* Enable PAE and LA57 (if required) paging modes */
-	movl	$X86_CR4_PAE, %eax
+	orl	$X86_CR4_PAE, %eax
 	testl	%edx, %edx
 	jz	1f
 	orl	$X86_CR4_LA57, %eax
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index 6ff7e81b5628..cc9b2529a086 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -6,7 +6,7 @@
 #define TRAMPOLINE_32BIT_PGTABLE_OFFSET	0
 
 #define TRAMPOLINE_32BIT_CODE_OFFSET	PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE	0x70
+#define TRAMPOLINE_32BIT_CODE_SIZE	0x80
 
 #define TRAMPOLINE_32BIT_STACK_END	TRAMPOLINE_32BIT_SIZE
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 9c63fc5988cd..184b7468ea76 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -140,8 +140,22 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	addq	$(init_top_pgt - __START_KERNEL_map), %rax
 1:
 
+#ifdef CONFIG_X86_MCE
+	/*
+	 * Preserve CR4.MCE if the kernel will enable #MC support.
+	 * Clearing MCE may fault in some environments (that also force #MC
+	 * support). Any machine check that occurs before #MC support is fully
+	 * configured will crash the system regardless of the CR4.MCE value set
+	 * here.
+	 */
+	movq	%cr4, %rcx
+	andl	$X86_CR4_MCE, %ecx
+#else
+	movl	$0, %ecx
+#endif
+
 	/* Enable PAE mode, PGE and LA57 */
-	movl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx
+	orl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx
 #ifdef CONFIG_X86_5LEVEL
 	testl	$1, __pgtable_l5_enabled(%rip)
 	jz	1f
@@ -246,13 +260,23 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	/* Setup EFER (Extended Feature Enable Register) */
 	movl	$MSR_EFER, %ecx
 	rdmsr
+	/*
+	 * Preserve current value of EFER for comparison and to skip
+	 * EFER writes if no change was made (for TDX guest)
+	 */
+	movl    %eax, %edx
 	btsl	$_EFER_SCE, %eax	/* Enable System Call */
 	btl	$20,%edi		/* No Execute supported? */
 	jnc     1f
 	btsl	$_EFER_NX, %eax
 	btsq	$_PAGE_BIT_NX,early_pmd_flags(%rip)
-1:	wrmsr				/* Make changes effective */
 
+	/* Avoid writing EFER if no change was made (for TDX guest) */
+1:	cmpl	%edx, %eax
+	je	1f
+	xor	%edx, %edx
+	wrmsr				/* Make changes effective */
+1:
 	/* Setup cr0 */
 	movl	$CR0_STATE, %eax
 	/* Make changes effective */
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index d380f2d1fd23..e38d61d6562e 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -143,11 +143,22 @@ SYM_CODE_START(startup_32)
 	movl	%eax, %cr3
 
 	# Set up EFER
+	movl	$MSR_EFER, %ecx
+	rdmsr
+	/*
+	 * Skip writing to EFER if the register already has desired
+	 * value (to avoid #VE for the TDX guest).
+	 */
+	cmp	pa_tr_efer, %eax
+	jne	.Lwrite_efer
+	cmp	pa_tr_efer + 4, %edx
+	je	.Ldone_efer
+.Lwrite_efer:
 	movl	pa_tr_efer, %eax
 	movl	pa_tr_efer + 4, %edx
-	movl	$MSR_EFER, %ecx
 	wrmsr
 
+.Ldone_efer:
 	# Enable paging and in turn activate Long Mode.
 	movl	$CR0_STATE, %eax
 	movl	%eax, %cr0
-- 
2.34.1


  parent reply	other threads:[~2022-02-18 16:19 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-18 16:16 [PATCHv3 00/32] TDX Guest: TDX core support Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 01/32] x86/mm: Fix warning on build with X86_MEM_ENCRYPT=y Kirill A. Shutemov
2022-02-18 19:39   ` Dave Hansen
2022-02-18 16:16 ` [PATCHv3 02/32] x86/coco: Add API to handle encryption mask Kirill A. Shutemov
2022-02-18 20:36   ` Dave Hansen
2022-02-18 21:33     ` Kirill A. Shutemov
2022-02-18 21:58       ` Borislav Petkov
2022-02-19  0:13         ` [PATCHv3.1 2/32] x86/coco: Explicitly declare type of confidential computing platform Kirill A. Shutemov
2022-02-19  0:13           ` [PATCHv3.1 2.1/2] x86/coco: Add API to handle encryption mask Kirill A. Shutemov
2022-02-21 18:31             ` Tom Lendacky
2022-02-21 22:14               ` Kirill A. Shutemov
2022-02-21 21:05             ` Borislav Petkov
2022-02-21 22:10               ` Kirill A. Shutemov
2022-02-21 22:36                 ` Borislav Petkov
2022-02-21 23:25                   ` Kirill A. Shutemov
2022-02-22 13:31                     ` Borislav Petkov
2022-02-21 11:07           ` [PATCHv3.1 2/32] x86/coco: Explicitly declare type of confidential computing platform Borislav Petkov
2022-02-21 11:44             ` Kirill A. Shutemov
2022-02-21 12:05               ` Borislav Petkov
2022-02-21 13:52             ` Wei Liu
2022-02-21 20:20               ` Borislav Petkov
2022-02-21 19:28       ` [PATCHv3 02/32] x86/coco: Add API to handle encryption mask Dave Hansen
2022-02-21 19:49         ` Borislav Petkov
2022-02-21 22:21           ` Kirill A. Shutemov
2022-02-21 22:56             ` Borislav Petkov
2022-02-21 23:18               ` Kirill A. Shutemov
2022-02-22 13:28                 ` Borislav Petkov
2022-02-21 22:28         ` Kirill A. Shutemov
2022-02-22 11:03           ` Kirill A. Shutemov
2022-02-22 13:37             ` Borislav Petkov
2022-02-22 13:52               ` Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 03/32] x86/tdx: Detect running as a TDX guest in early boot Kirill A. Shutemov
2022-02-18 21:07   ` Dave Hansen
2022-02-20 15:01     ` Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 04/32] x86/tdx: Provide common base for SEAMCALL and TDCALL C wrappers Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 05/32] x86/tdx: Add __tdx_module_call() and __tdx_hypercall() helper functions Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 06/32] x86/tdx: Extend the confidential computing API to support TDX guests Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 07/32] x86/tdx: Exclude shared bit from __PHYSICAL_MASK Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 08/32] x86/traps: Add #VE support for TDX guest Kirill A. Shutemov
2022-02-22  7:19   ` Dingji Li
2022-02-22 11:11     ` Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 09/32] x86/tdx: Add HLT support for TDX guests Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 10/32] x86/tdx: Add MSR " Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 11/32] x86/tdx: Handle CPUID via #VE Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 12/32] x86/tdx: Handle in-kernel MMIO Kirill A. Shutemov
2022-02-18 16:16 ` [PATCHv3 13/32] x86/tdx: Detect TDX at early kernel decompression time Kirill A. Shutemov
2022-02-21 11:37   ` Cyrill Gorcunov
2022-02-21 13:53     ` Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 14/32] x86: Adjust types used in port I/O helpers Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 15/32] x86: Consolidate " Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 16/32] x86/boot: Allow to hook up alternative " Kirill A. Shutemov
2022-02-21 20:04   ` Tom Lendacky
2022-02-21 22:02     ` Kirill A. Shutemov
2022-02-22  1:25       ` Josh Poimboeuf
2022-02-18 16:17 ` [PATCHv3 17/32] x86/boot/compressed: Support TDX guest port I/O at decompression time Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 18/32] x86/tdx: Add port I/O emulation Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 19/32] x86/tdx: Handle early boot port I/O Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 20/32] x86/tdx: Wire up KVM hypercalls Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 21/32] x86/boot: Add a trampoline for booting APs via firmware handoff Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 22/32] x86/acpi, x86/boot: Add multiprocessor wake-up support Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 23/32] x86/boot: Set CR0.NE early and keep it set during the boot Kirill A. Shutemov
2022-02-18 16:17 ` Kirill A. Shutemov [this message]
2022-02-18 16:17 ` [PATCHv3 25/32] x86/topology: Disable CPU online/offline control for TDX guests Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 26/32] x86/tdx: Make pages shared in ioremap() Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 27/32] x86/mm/cpa: Generailize __set_memory_enc_pgtable() Kirill A. Shutemov
2022-02-21 15:46   ` Brijesh Singh
2022-02-18 16:17 ` [PATCHv3 28/32] x86/mm/cpa: Add support for TDX shared memory Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 29/32] x86/kvm: Use bounce buffers for TD guest Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 30/32] x86/tdx: ioapic: Add shared bit for IOAPIC base address Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 31/32] ACPICA: Avoid cache flush on TDX guest Kirill A. Shutemov
2022-02-18 16:17 ` [PATCHv3 32/32] Documentation/x86: Document TDX kernel architecture Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220218161718.67148-25-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=aarcange@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=david@redhat.com \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=jpoimboe@redhat.com \
    --cc=knsathya@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=sdeep@vmware.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.