linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] x86/tdx: Changes for TDX guest initialization
@ 2022-12-09 13:25 Kirill A. Shutemov
  2022-12-09 13:25 ` [PATCH 1/4] x86/tdx: Expand __tdx_hypercall() to handle more arguments Kirill A. Shutemov
                   ` (3 more replies)
  0 siblings, 4 replies; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-09 13:25 UTC (permalink / raw)
  To: Dave Hansen, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel, Kirill A. Shutemov

Several changes to TDX initialization:

- Make early panic message visible to user;

- Relax SEPT_VE_DISABLE for debug TD. It helps to investigate bugs
  resulting in access of unaccepted memory.

- Make sure NOTIFY_ENABLES is off to eliminate possible source of random
  #VE.

The patchset makes use of ReportFatalError TDVMCALL. The definition of
the TDVMCALL has changed in recent GHCI update[1]. It now requires more
arguments handled by __tdx_hypercall(). The patch that expands
__tdx_hypercall() is the same as the patch included in TDX guest
enabling for Hyper-V.

[1] https://cdrdv2.intel.com/v1/dl/getContent/726790

Kirill A. Shutemov (4):
  x86/tdx: Expand __tdx_hypercall() to handle more arguments
  x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  x86/tdx: Relax SEPT_VE_DISABLE check for debug TD
  x86/tdx: Disable NOTIFY_ENABLES

 arch/x86/coco/tdx/tdcall.S        | 82 ++++++++++++++++++++++---------
 arch/x86/coco/tdx/tdx.c           | 57 ++++++++++++++++++++-
 arch/x86/include/asm/shared/tdx.h |  6 +++
 arch/x86/kernel/asm-offsets.c     |  6 +++
 4 files changed, 125 insertions(+), 26 deletions(-)

-- 
2.38.0


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/4] x86/tdx: Expand __tdx_hypercall() to handle more arguments
  2022-12-09 13:25 [PATCH 0/4] x86/tdx: Changes for TDX guest initialization Kirill A. Shutemov
@ 2022-12-09 13:25 ` Kirill A. Shutemov
  2022-12-13 22:44   ` Dave Hansen
  2022-12-09 13:25 ` [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE Kirill A. Shutemov
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-09 13:25 UTC (permalink / raw)
  To: Dave Hansen, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel, Kirill A. Shutemov

So far __tdx_hypercall() only handles six arguments for VMCALL.
Expanding it to six more register would allow to cover more use-cases.

Using RDI and RSI as VMCALL arguments requires more register shuffling.
RAX is used to hold tdx_hypercall_args pointer and RBP stores flags.

While there, fix typo in the comment on panic branch.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/coco/tdx/tdcall.S        | 82 ++++++++++++++++++++++---------
 arch/x86/include/asm/shared/tdx.h |  6 +++
 arch/x86/kernel/asm-offsets.c     |  6 +++
 3 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
index f9eb1134f22d..64e57739dc9d 100644
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -13,6 +13,12 @@
 /*
  * Bitmasks of exposed registers (with VMM).
  */
+#define TDX_RDX		BIT(2)
+#define TDX_RBX		BIT(3)
+#define TDX_RSI		BIT(6)
+#define TDX_RDI		BIT(7)
+#define TDX_R8		BIT(8)
+#define TDX_R9		BIT(9)
 #define TDX_R10		BIT(10)
 #define TDX_R11		BIT(11)
 #define TDX_R12		BIT(12)
@@ -27,9 +33,9 @@
  * details can be found in TDX GHCI specification, section
  * titled "TDCALL [TDG.VP.VMCALL] leaf".
  */
-#define TDVMCALL_EXPOSE_REGS_MASK	( TDX_R10 | TDX_R11 | \
-					  TDX_R12 | TDX_R13 | \
-					  TDX_R14 | TDX_R15 )
+#define TDVMCALL_EXPOSE_REGS_MASK	\
+	( TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8  | TDX_R9  | \
+	  TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15 )
 
 /*
  * __tdx_module_call()  - Used by TDX guests to request services from
@@ -124,19 +130,32 @@ SYM_FUNC_START(__tdx_hypercall)
 	push %r14
 	push %r13
 	push %r12
+	push %rbx
+	push %rbp
+
+	movq %rdi, %rax
+	movq %rsi, %rbp
+
+	/* Copy hypercall registers from arg struct: */
+	movq TDX_HYPERCALL_r8(%rax),  %r8
+	movq TDX_HYPERCALL_r9(%rax),  %r9
+	movq TDX_HYPERCALL_r10(%rax), %r10
+	movq TDX_HYPERCALL_r11(%rax), %r11
+	movq TDX_HYPERCALL_r12(%rax), %r12
+	movq TDX_HYPERCALL_r13(%rax), %r13
+	movq TDX_HYPERCALL_r14(%rax), %r14
+	movq TDX_HYPERCALL_r15(%rax), %r15
+	movq TDX_HYPERCALL_rdi(%rax), %rdi
+	movq TDX_HYPERCALL_rsi(%rax), %rsi
+	movq TDX_HYPERCALL_rbx(%rax), %rbx
+	movq TDX_HYPERCALL_rdx(%rax), %rdx
+
+	push %rax
 
 	/* Mangle function call ABI into TDCALL ABI: */
 	/* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */
 	xor %eax, %eax
 
-	/* Copy hypercall registers from arg struct: */
-	movq TDX_HYPERCALL_r10(%rdi), %r10
-	movq TDX_HYPERCALL_r11(%rdi), %r11
-	movq TDX_HYPERCALL_r12(%rdi), %r12
-	movq TDX_HYPERCALL_r13(%rdi), %r13
-	movq TDX_HYPERCALL_r14(%rdi), %r14
-	movq TDX_HYPERCALL_r15(%rdi), %r15
-
 	movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
 
 	/*
@@ -148,14 +167,14 @@ SYM_FUNC_START(__tdx_hypercall)
 	 * HLT operation indefinitely. Since this is the not the desired
 	 * result, conditionally call STI before TDCALL.
 	 */
-	testq $TDX_HCALL_ISSUE_STI, %rsi
+	testq $TDX_HCALL_ISSUE_STI, %rbp
 	jz .Lskip_sti
 	sti
 .Lskip_sti:
 	tdcall
 
 	/*
-	 * RAX==0 indicates a failure of the TDVMCALL mechanism itself and that
+	 * RAX!=0 indicates a failure of the TDVMCALL mechanism itself and that
 	 * something has gone horribly wrong with the TDX module.
 	 *
 	 * The return status of the hypercall operation is in a separate
@@ -165,30 +184,45 @@ SYM_FUNC_START(__tdx_hypercall)
 	testq %rax, %rax
 	jne .Lpanic
 
-	/* TDVMCALL leaf return code is in R10 */
-	movq %r10, %rax
+	pop %rax
 
 	/* Copy hypercall result registers to arg struct if needed */
-	testq $TDX_HCALL_HAS_OUTPUT, %rsi
+	testq $TDX_HCALL_HAS_OUTPUT, %rbp
 	jz .Lout
 
-	movq %r10, TDX_HYPERCALL_r10(%rdi)
-	movq %r11, TDX_HYPERCALL_r11(%rdi)
-	movq %r12, TDX_HYPERCALL_r12(%rdi)
-	movq %r13, TDX_HYPERCALL_r13(%rdi)
-	movq %r14, TDX_HYPERCALL_r14(%rdi)
-	movq %r15, TDX_HYPERCALL_r15(%rdi)
+	movq %r8,  TDX_HYPERCALL_r8(%rax)
+	movq %r9,  TDX_HYPERCALL_r9(%rax)
+	movq %r10, TDX_HYPERCALL_r10(%rax)
+	movq %r11, TDX_HYPERCALL_r11(%rax)
+	movq %r12, TDX_HYPERCALL_r12(%rax)
+	movq %r13, TDX_HYPERCALL_r13(%rax)
+	movq %r14, TDX_HYPERCALL_r14(%rax)
+	movq %r15, TDX_HYPERCALL_r15(%rax)
+	movq %rdi, TDX_HYPERCALL_rdi(%rax)
+	movq %rsi, TDX_HYPERCALL_rsi(%rax)
+	movq %rbx, TDX_HYPERCALL_rbx(%rax)
+	movq %rdx, TDX_HYPERCALL_rdx(%rax)
 .Lout:
+	/* TDVMCALL leaf return code is in R10 */
+	movq %r10, %rax
+
 	/*
 	 * Zero out registers exposed to the VMM to avoid speculative execution
 	 * with VMM-controlled values. This needs to include all registers
-	 * present in TDVMCALL_EXPOSE_REGS_MASK (except R12-R15). R12-R15
-	 * context will be restored.
+	 * present in TDVMCALL_EXPOSE_REGS_MASK, except RBX, and R12-R15 which
+	 * will be restored.
 	 */
+	xor %r8d,  %r8d
+	xor %r9d,  %r9d
 	xor %r10d, %r10d
 	xor %r11d, %r11d
+	xor %rdi,  %rdi
+	xor %rsi,  %rsi
+	xor %rdx,  %rdx
 
 	/* Restore callee-saved GPRs as mandated by the x86_64 ABI */
+	pop %rbp
+	pop %rbx
 	pop %r12
 	pop %r13
 	pop %r14
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index e53f26228fbb..8068faa52de1 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -22,12 +22,18 @@
  * This is a software only structure and not part of the TDX module/VMM ABI.
  */
 struct tdx_hypercall_args {
+	u64 r8;
+	u64 r9;
 	u64 r10;
 	u64 r11;
 	u64 r12;
 	u64 r13;
 	u64 r14;
 	u64 r15;
+	u64 rdi;
+	u64 rsi;
+	u64 rbx;
+	u64 rdx;
 };
 
 /* Used to request services from the VMM */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index a668a70abf32..3b6957d4d0ed 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -75,12 +75,18 @@ static void __used common(void)
 	OFFSET(TDX_MODULE_r11, tdx_module_output, r11);
 
 	BLANK();
+	OFFSET(TDX_HYPERCALL_r8,  tdx_hypercall_args, r8);
+	OFFSET(TDX_HYPERCALL_r9,  tdx_hypercall_args, r9);
 	OFFSET(TDX_HYPERCALL_r10, tdx_hypercall_args, r10);
 	OFFSET(TDX_HYPERCALL_r11, tdx_hypercall_args, r11);
 	OFFSET(TDX_HYPERCALL_r12, tdx_hypercall_args, r12);
 	OFFSET(TDX_HYPERCALL_r13, tdx_hypercall_args, r13);
 	OFFSET(TDX_HYPERCALL_r14, tdx_hypercall_args, r14);
 	OFFSET(TDX_HYPERCALL_r15, tdx_hypercall_args, r15);
+	OFFSET(TDX_HYPERCALL_rdi, tdx_hypercall_args, rdi);
+	OFFSET(TDX_HYPERCALL_rsi, tdx_hypercall_args, rsi);
+	OFFSET(TDX_HYPERCALL_rbx, tdx_hypercall_args, rbx);
+	OFFSET(TDX_HYPERCALL_rdx, tdx_hypercall_args, rdx);
 
 	BLANK();
 	OFFSET(BP_scratch, boot_params, scratch);
-- 
2.38.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-09 13:25 [PATCH 0/4] x86/tdx: Changes for TDX guest initialization Kirill A. Shutemov
  2022-12-09 13:25 ` [PATCH 1/4] x86/tdx: Expand __tdx_hypercall() to handle more arguments Kirill A. Shutemov
@ 2022-12-09 13:25 ` Kirill A. Shutemov
  2022-12-09 15:42   ` Sathyanarayanan Kuppuswamy
  2022-12-13 23:06   ` Dave Hansen
  2022-12-09 13:25 ` [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD Kirill A. Shutemov
  2022-12-09 13:25 ` [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES Kirill A. Shutemov
  3 siblings, 2 replies; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-09 13:25 UTC (permalink / raw)
  To: Dave Hansen, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel, Kirill A. Shutemov

The check for SEPT_VE_DISABLE happens early in the kernel boot where
earlyprintk is not yet functional. Kernel successfully detect broken
TD configuration and stops the kernel with panic(), but it cannot
communicate the reason to the user.

Use TDG.VP.VMCALL<ReportFatalError> to report the error. The hypercall
can encode message up to 64 bytes in eight registers.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/coco/tdx/tdx.c | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index cfd4c95b9f04..8ad04d101270 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -22,6 +22,7 @@
 
 /* TDX hypercall Leaf IDs */
 #define TDVMCALL_MAP_GPA		0x10001
+#define TDVMCALL_REPORT_FATAL_ERROR	0x10003
 
 /* MMIO direction */
 #define EPT_READ	0
@@ -140,6 +141,41 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
 }
 EXPORT_SYMBOL_GPL(tdx_mcall_get_report0);
 
+static void __noreturn tdx_panic(const char *msg)
+{
+	struct tdx_hypercall_args args = {
+		.r10 = TDX_HYPERCALL_STANDARD,
+		.r11 = TDVMCALL_REPORT_FATAL_ERROR,
+		.r12 = 0, /* Error code: 0 is Panic */
+	};
+	union {
+		/* Define register order according to the GHCI */
+		struct { u64 r14, r15, rbx, rdi, rsi, r8, r9, rdx; };
+
+		char str[64];
+	} message;
+
+	/* VMM assumes '\0' in byte 65, if the message took all 64 bytes */
+	strncpy(message.str, msg, 64);
+
+	args.r8  = message.r8;
+	args.r9  = message.r9;
+	args.r14 = message.r14;
+	args.r15 = message.r15;
+	args.rdi = message.rdi;
+	args.rsi = message.rsi;
+	args.rbx = message.rbx;
+	args.rdx = message.rdx;
+
+	/*
+	 * Keep calling the hypercall in case VMM did not terminated
+	 * the TD as it must.
+	 */
+	while (1) {
+		__tdx_hypercall(&args, 0);
+	}
+}
+
 static void tdx_parse_tdinfo(u64 *cc_mask)
 {
 	struct tdx_module_output out;
@@ -172,7 +208,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
 	 */
 	td_attr = out.rdx;
 	if (!(td_attr & ATTR_SEPT_VE_DISABLE))
-		panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n");
+		tdx_panic("TD misconfiguration: SEPT_VE_DISABLE attribute must be set.");
 }
 
 /*
-- 
2.38.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD
  2022-12-09 13:25 [PATCH 0/4] x86/tdx: Changes for TDX guest initialization Kirill A. Shutemov
  2022-12-09 13:25 ` [PATCH 1/4] x86/tdx: Expand __tdx_hypercall() to handle more arguments Kirill A. Shutemov
  2022-12-09 13:25 ` [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE Kirill A. Shutemov
@ 2022-12-09 13:25 ` Kirill A. Shutemov
  2022-12-09 15:45   ` Sathyanarayanan Kuppuswamy
  2022-12-13 23:13   ` Dave Hansen
  2022-12-09 13:25 ` [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES Kirill A. Shutemov
  3 siblings, 2 replies; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-09 13:25 UTC (permalink / raw)
  To: Dave Hansen, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel, Kirill A. Shutemov

SEPT_VE_DISABLE check is required to keep the TD protected from VMM
attacks, but it makes harder to debug guest kernel bugs. If guest
touches unaccepted memory the TD will get terminated without any
traces on what has happened.

Relax the SEPT_VE_DISABLE check to warning on debug TD and panic() in
the #VE handler on EPT-violation on private memory. It will produce
useful backtrace.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/coco/tdx/tdx.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 8ad04d101270..0e47846ff8ff 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -38,6 +38,7 @@
 #define VE_GET_PORT_NUM(e)	((e) >> 16)
 #define VE_IS_IO_STRING(e)	((e) & BIT(4))
 
+#define ATTR_DEBUG		BIT(0)
 #define ATTR_SEPT_VE_DISABLE	BIT(28)
 
 /* TDX Module call error codes */
@@ -207,8 +208,15 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
 	 * TD-private memory.  Only VMM-shared memory (MMIO) will #VE.
 	 */
 	td_attr = out.rdx;
-	if (!(td_attr & ATTR_SEPT_VE_DISABLE))
-		tdx_panic("TD misconfiguration: SEPT_VE_DISABLE attribute must be set.");
+	if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
+		const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
+
+		/* Relax SEPT_VE_DISABLE check for debug TD. */
+		if (td_attr & ATTR_DEBUG)
+			pr_warn("%s\n", msg);
+		else
+			tdx_panic(msg);
+	}
 }
 
 /*
@@ -682,6 +690,8 @@ static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
 	case EXIT_REASON_CPUID:
 		return handle_cpuid(regs, ve);
 	case EXIT_REASON_EPT_VIOLATION:
+		if (ve->gpa != cc_mkdec(ve->gpa))
+			panic("Unexpected EPT-violation on private memory.");
 		return handle_mmio(regs, ve);
 	case EXIT_REASON_IO_INSTRUCTION:
 		return handle_io(regs, ve);
-- 
2.38.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES
  2022-12-09 13:25 [PATCH 0/4] x86/tdx: Changes for TDX guest initialization Kirill A. Shutemov
                   ` (2 preceding siblings ...)
  2022-12-09 13:25 ` [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD Kirill A. Shutemov
@ 2022-12-09 13:25 ` Kirill A. Shutemov
  2022-12-09 15:50   ` Sathyanarayanan Kuppuswamy
  2022-12-13 23:17   ` Dave Hansen
  3 siblings, 2 replies; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-09 13:25 UTC (permalink / raw)
  To: Dave Hansen, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel, Kirill A. Shutemov

== Background ==

There is a class of side-channel attacks against SGX enclaves called
"SGX Step"[1]. These attacks create lots of exceptions inside of
enclaves. Basically, run an in-enclave instruction, cause an exception.
Over and over.

There is a concern that a VMM could attack a TDX guest in the same way
by causing lots of #VE's. The TDX architecture includes new
countermeasures for these attacks. It basically counts the number of
exceptions and can send another *special* exception once the number of
VMM-induced #VE's hits a critical threshold[2].

== Problem ==

But, these special exceptions are independent of any action that the
guest takes. They can occur anywhere that the guest executes. This
includes sensitive areas like the entry code. The (non-paranoid) #VE
handler is incapable of handling exceptions in these areas.

== Solution ==

Fortunately, the special exceptions can be disabled by the guest via
write to NOTIFY_ENABLES TDCS field. NOTIFY_ENABLES is disabled by
default, but might be enabled by a bootloader, firmware or an earlier
kernel before the current kernel runs.

Disable NOTIFY_ENABLES feature explicitly and unconditionally. Any
NOTIFY_ENABLES-based #VE's that occur before this point will end up
in the early #VE exception handler and die due to unexpected exit
reason.

[1] https://github.com/jovanbulck/sgx-step
[2] https://intel.github.io/ccc-linux-guest-hardening-docs/security-spec.html#safety-against-ve-in-kernel-code

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/coco/tdx/tdx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 0e47846ff8ff..c93c2fd2e113 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -19,6 +19,10 @@
 #define TDX_GET_VEINFO			3
 #define TDX_GET_REPORT			4
 #define TDX_ACCEPT_PAGE			6
+#define TDX_WR				8
+
+/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
+#define TDCS_NOTIFY_ENABLES		0x9100000000000010
 
 /* TDX hypercall Leaf IDs */
 #define TDVMCALL_MAP_GPA		0x10001
@@ -858,6 +862,9 @@ void __init tdx_early_init(void)
 	tdx_parse_tdinfo(&cc_mask);
 	cc_set_mask(cc_mask);
 
+	/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
+	tdx_module_call(TDX_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL);
+
 	/*
 	 * All bits above GPA width are reserved and kernel treats shared bit
 	 * as flag, not as part of physical address.
-- 
2.38.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-09 13:25 ` [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE Kirill A. Shutemov
@ 2022-12-09 15:42   ` Sathyanarayanan Kuppuswamy
  2022-12-09 17:06     ` Kirill A. Shutemov
  2022-12-13 23:06   ` Dave Hansen
  1 sibling, 1 reply; 26+ messages in thread
From: Sathyanarayanan Kuppuswamy @ 2022-12-09 15:42 UTC (permalink / raw)
  To: Kirill A. Shutemov, Dave Hansen, Borislav Petkov, Andy Lutomirski
  Cc: Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel



On 12/9/22 5:25 AM, Kirill A. Shutemov wrote:
> The check for SEPT_VE_DISABLE happens early in the kernel boot where
> earlyprintk is not yet functional. Kernel successfully detect broken
> TD configuration and stops the kernel with panic(), but it cannot
> communicate the reason to the user.
> 
> Use TDG.VP.VMCALL<ReportFatalError> to report the error. The hypercall
> can encode message up to 64 bytes in eight registers.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/coco/tdx/tdx.c | 38 +++++++++++++++++++++++++++++++++++++-
>  1 file changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index cfd4c95b9f04..8ad04d101270 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -22,6 +22,7 @@
>  
>  /* TDX hypercall Leaf IDs */
>  #define TDVMCALL_MAP_GPA		0x10001
> +#define TDVMCALL_REPORT_FATAL_ERROR	0x10003
>  
>  /* MMIO direction */
>  #define EPT_READ	0
> @@ -140,6 +141,41 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
>  }
>  EXPORT_SYMBOL_GPL(tdx_mcall_get_report0);
>  
> +static void __noreturn tdx_panic(const char *msg)
> +{
> +	struct tdx_hypercall_args args = {
> +		.r10 = TDX_HYPERCALL_STANDARD,
> +		.r11 = TDVMCALL_REPORT_FATAL_ERROR,
> +		.r12 = 0, /* Error code: 0 is Panic */
> +	};
> +	union {
> +		/* Define register order according to the GHCI */
> +		struct { u64 r14, r15, rbx, rdi, rsi, r8, r9, rdx; };
> +
> +		char str[64];
> +	} message;
> +
> +	/* VMM assumes '\0' in byte 65, if the message took all 64 bytes */
> +	strncpy(message.str, msg, 64);
> +
> +	args.r8  = message.r8;
> +	args.r9  = message.r9;
> +	args.r14 = message.r14;
> +	args.r15 = message.r15;
> +	args.rdi = message.rdi;
> +	args.rsi = message.rsi;
> +	args.rbx = message.rbx;
> +	args.rdx = message.rdx;
> +
> +	/*
> +	 * Keep calling the hypercall in case VMM did not terminated
> +	 * the TD as it must.
> +	 */
> +	while (1) {
> +		__tdx_hypercall(&args, 0);
> +	}

Instead of an infinite loop, I'm wondering if the guest should panic after
retrying for few times.

> +}
> +
>  static void tdx_parse_tdinfo(u64 *cc_mask)
>  {
>  	struct tdx_module_output out;
> @@ -172,7 +208,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
>  	 */
>  	td_attr = out.rdx;
>  	if (!(td_attr & ATTR_SEPT_VE_DISABLE))
> -		panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n");
> +		tdx_panic("TD misconfiguration: SEPT_VE_DISABLE attribute must be set.");
>  }
>  
>  /*

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD
  2022-12-09 13:25 ` [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD Kirill A. Shutemov
@ 2022-12-09 15:45   ` Sathyanarayanan Kuppuswamy
  2022-12-09 17:08     ` Kirill A. Shutemov
  2022-12-13 23:13   ` Dave Hansen
  1 sibling, 1 reply; 26+ messages in thread
From: Sathyanarayanan Kuppuswamy @ 2022-12-09 15:45 UTC (permalink / raw)
  To: Kirill A. Shutemov, Dave Hansen, Borislav Petkov, Andy Lutomirski
  Cc: Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel



On 12/9/22 5:25 AM, Kirill A. Shutemov wrote:
> SEPT_VE_DISABLE check is required to keep the TD protected from VMM
> attacks, but it makes harder to debug guest kernel bugs. If guest
> touches unaccepted memory the TD will get terminated without any
> traces on what has happened.
> 
> Relax the SEPT_VE_DISABLE check to warning on debug TD and panic() in
> the #VE handler on EPT-violation on private memory. It will produce
> useful backtrace.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/coco/tdx/tdx.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 8ad04d101270..0e47846ff8ff 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -38,6 +38,7 @@
>  #define VE_GET_PORT_NUM(e)	((e) >> 16)
>  #define VE_IS_IO_STRING(e)	((e) & BIT(4))
>  
> +#define ATTR_DEBUG		BIT(0)
>  #define ATTR_SEPT_VE_DISABLE	BIT(28)
>  
>  /* TDX Module call error codes */
> @@ -207,8 +208,15 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
>  	 * TD-private memory.  Only VMM-shared memory (MMIO) will #VE.
>  	 */
>  	td_attr = out.rdx;
> -	if (!(td_attr & ATTR_SEPT_VE_DISABLE))
> -		tdx_panic("TD misconfiguration: SEPT_VE_DISABLE attribute must be set.");
> +	if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
> +		const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
> +
> +		/* Relax SEPT_VE_DISABLE check for debug TD. */
> +		if (td_attr & ATTR_DEBUG)
> +			pr_warn("%s\n", msg);
> +		else
> +			tdx_panic(msg);
> +	}
>  }
>  
>  /*
> @@ -682,6 +690,8 @@ static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
>  	case EXIT_REASON_CPUID:
>  		return handle_cpuid(regs, ve);
>  	case EXIT_REASON_EPT_VIOLATION:
> +		if (ve->gpa != cc_mkdec(ve->gpa))
> +			panic("Unexpected EPT-violation on private memory.");

Why add this change part of TD debug check? Should this be a separate patch?

>  		return handle_mmio(regs, ve);
>  	case EXIT_REASON_IO_INSTRUCTION:
>  		return handle_io(regs, ve);

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES
  2022-12-09 13:25 ` [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES Kirill A. Shutemov
@ 2022-12-09 15:50   ` Sathyanarayanan Kuppuswamy
  2022-12-09 17:10     ` Kirill A. Shutemov
  2022-12-13 23:17   ` Dave Hansen
  1 sibling, 1 reply; 26+ messages in thread
From: Sathyanarayanan Kuppuswamy @ 2022-12-09 15:50 UTC (permalink / raw)
  To: Kirill A. Shutemov, Dave Hansen, Borislav Petkov, Andy Lutomirski
  Cc: Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel



On 12/9/22 5:25 AM, Kirill A. Shutemov wrote:
> == Background ==
> 
> There is a class of side-channel attacks against SGX enclaves called
> "SGX Step"[1]. These attacks create lots of exceptions inside of
> enclaves. Basically, run an in-enclave instruction, cause an exception.
> Over and over.
> 
> There is a concern that a VMM could attack a TDX guest in the same way
> by causing lots of #VE's. The TDX architecture includes new
> countermeasures for these attacks. It basically counts the number of
> exceptions and can send another *special* exception once the number of
> VMM-induced #VE's hits a critical threshold[2].
> 
> == Problem ==
> 
> But, these special exceptions are independent of any action that the
> guest takes. They can occur anywhere that the guest executes. This
> includes sensitive areas like the entry code. The (non-paranoid) #VE
> handler is incapable of handling exceptions in these areas.
> 
> == Solution ==
> 
> Fortunately, the special exceptions can be disabled by the guest via
> write to NOTIFY_ENABLES TDCS field. NOTIFY_ENABLES is disabled by
> default, but might be enabled by a bootloader, firmware or an earlier
> kernel before the current kernel runs.
> 
> Disable NOTIFY_ENABLES feature explicitly and unconditionally. Any
> NOTIFY_ENABLES-based #VE's that occur before this point will end up
> in the early #VE exception handler and die due to unexpected exit
> reason.
> 
> [1] https://github.com/jovanbulck/sgx-step
> [2] https://intel.github.io/ccc-linux-guest-hardening-docs/security-spec.html#safety-against-ve-in-kernel-code
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---

I don't think you need to explicitly use section names (Background,
problem or solution) in the commit log. But it is up to you.

Rest looks good.


>  arch/x86/coco/tdx/tdx.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 0e47846ff8ff..c93c2fd2e113 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -19,6 +19,10 @@
>  #define TDX_GET_VEINFO			3
>  #define TDX_GET_REPORT			4
>  #define TDX_ACCEPT_PAGE			6
> +#define TDX_WR				8
> +
> +/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
> +#define TDCS_NOTIFY_ENABLES		0x9100000000000010
>  
>  /* TDX hypercall Leaf IDs */
>  #define TDVMCALL_MAP_GPA		0x10001
> @@ -858,6 +862,9 @@ void __init tdx_early_init(void)
>  	tdx_parse_tdinfo(&cc_mask);
>  	cc_set_mask(cc_mask);
>  
> +	/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
> +	tdx_module_call(TDX_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL);
> +
>  	/*
>  	 * All bits above GPA width are reserved and kernel treats shared bit
>  	 * as flag, not as part of physical address.

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-09 15:42   ` Sathyanarayanan Kuppuswamy
@ 2022-12-09 17:06     ` Kirill A. Shutemov
  2022-12-09 20:51       ` Sathyanarayanan Kuppuswamy
  0 siblings, 1 reply; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-09 17:06 UTC (permalink / raw)
  To: Sathyanarayanan Kuppuswamy
  Cc: Kirill A. Shutemov, Dave Hansen, Borislav Petkov,
	Andy Lutomirski, Thomas Gleixner, Elena Reshetova, x86,
	linux-coco, linux-kernel

On Fri, Dec 09, 2022 at 07:42:56AM -0800, Sathyanarayanan Kuppuswamy wrote:
> 
> 
> On 12/9/22 5:25 AM, Kirill A. Shutemov wrote:
> > The check for SEPT_VE_DISABLE happens early in the kernel boot where
> > earlyprintk is not yet functional. Kernel successfully detect broken
> > TD configuration and stops the kernel with panic(), but it cannot
> > communicate the reason to the user.
> > 
> > Use TDG.VP.VMCALL<ReportFatalError> to report the error. The hypercall
> > can encode message up to 64 bytes in eight registers.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> >  arch/x86/coco/tdx/tdx.c | 38 +++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 37 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> > index cfd4c95b9f04..8ad04d101270 100644
> > --- a/arch/x86/coco/tdx/tdx.c
> > +++ b/arch/x86/coco/tdx/tdx.c
> > @@ -22,6 +22,7 @@
> >  
> >  /* TDX hypercall Leaf IDs */
> >  #define TDVMCALL_MAP_GPA		0x10001
> > +#define TDVMCALL_REPORT_FATAL_ERROR	0x10003
> >  
> >  /* MMIO direction */
> >  #define EPT_READ	0
> > @@ -140,6 +141,41 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
> >  }
> >  EXPORT_SYMBOL_GPL(tdx_mcall_get_report0);
> >  
> > +static void __noreturn tdx_panic(const char *msg)
> > +{
> > +	struct tdx_hypercall_args args = {
> > +		.r10 = TDX_HYPERCALL_STANDARD,
> > +		.r11 = TDVMCALL_REPORT_FATAL_ERROR,
> > +		.r12 = 0, /* Error code: 0 is Panic */
> > +	};
> > +	union {
> > +		/* Define register order according to the GHCI */
> > +		struct { u64 r14, r15, rbx, rdi, rsi, r8, r9, rdx; };
> > +
> > +		char str[64];
> > +	} message;
> > +
> > +	/* VMM assumes '\0' in byte 65, if the message took all 64 bytes */
> > +	strncpy(message.str, msg, 64);
> > +
> > +	args.r8  = message.r8;
> > +	args.r9  = message.r9;
> > +	args.r14 = message.r14;
> > +	args.r15 = message.r15;
> > +	args.rdi = message.rdi;
> > +	args.rsi = message.rsi;
> > +	args.rbx = message.rbx;
> > +	args.rdx = message.rdx;
> > +
> > +	/*
> > +	 * Keep calling the hypercall in case VMM did not terminated
> > +	 * the TD as it must.
> > +	 */
> > +	while (1) {
> > +		__tdx_hypercall(&args, 0);
> > +	}
> 
> Instead of an infinite loop, I'm wondering if the guest should panic after
> retrying for few times.

Hm. What difference would it make?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD
  2022-12-09 15:45   ` Sathyanarayanan Kuppuswamy
@ 2022-12-09 17:08     ` Kirill A. Shutemov
  0 siblings, 0 replies; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-09 17:08 UTC (permalink / raw)
  To: Sathyanarayanan Kuppuswamy
  Cc: Kirill A. Shutemov, Dave Hansen, Borislav Petkov,
	Andy Lutomirski, Thomas Gleixner, Elena Reshetova, x86,
	linux-coco, linux-kernel

On Fri, Dec 09, 2022 at 07:45:34AM -0800, Sathyanarayanan Kuppuswamy wrote:
> 
> 
> On 12/9/22 5:25 AM, Kirill A. Shutemov wrote:
> > SEPT_VE_DISABLE check is required to keep the TD protected from VMM
> > attacks, but it makes harder to debug guest kernel bugs. If guest
> > touches unaccepted memory the TD will get terminated without any
> > traces on what has happened.
> > 
> > Relax the SEPT_VE_DISABLE check to warning on debug TD and panic() in
> > the #VE handler on EPT-violation on private memory. It will produce
> > useful backtrace.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> >  arch/x86/coco/tdx/tdx.c | 14 ++++++++++++--
> >  1 file changed, 12 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> > index 8ad04d101270..0e47846ff8ff 100644
> > --- a/arch/x86/coco/tdx/tdx.c
> > +++ b/arch/x86/coco/tdx/tdx.c
> > @@ -38,6 +38,7 @@
> >  #define VE_GET_PORT_NUM(e)	((e) >> 16)
> >  #define VE_IS_IO_STRING(e)	((e) & BIT(4))
> >  
> > +#define ATTR_DEBUG		BIT(0)
> >  #define ATTR_SEPT_VE_DISABLE	BIT(28)
> >  
> >  /* TDX Module call error codes */
> > @@ -207,8 +208,15 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
> >  	 * TD-private memory.  Only VMM-shared memory (MMIO) will #VE.
> >  	 */
> >  	td_attr = out.rdx;
> > -	if (!(td_attr & ATTR_SEPT_VE_DISABLE))
> > -		tdx_panic("TD misconfiguration: SEPT_VE_DISABLE attribute must be set.");
> > +	if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
> > +		const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
> > +
> > +		/* Relax SEPT_VE_DISABLE check for debug TD. */
> > +		if (td_attr & ATTR_DEBUG)
> > +			pr_warn("%s\n", msg);
> > +		else
> > +			tdx_panic(msg);
> > +	}
> >  }
> >  
> >  /*
> > @@ -682,6 +690,8 @@ static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
> >  	case EXIT_REASON_CPUID:
> >  		return handle_cpuid(regs, ve);
> >  	case EXIT_REASON_EPT_VIOLATION:
> > +		if (ve->gpa != cc_mkdec(ve->gpa))
> > +			panic("Unexpected EPT-violation on private memory.");
> 
> Why add this change part of TD debug check? Should this be a separate patch?

This code is never reachable if ATTR_SEPT_VE_DISABLE is set. And the panic
provides backtrace useful for debug.


> 
> >  		return handle_mmio(regs, ve);
> >  	case EXIT_REASON_IO_INSTRUCTION:
> >  		return handle_io(regs, ve);
> 
> -- 
> Sathyanarayanan Kuppuswamy
> Linux Kernel Developer

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES
  2022-12-09 15:50   ` Sathyanarayanan Kuppuswamy
@ 2022-12-09 17:10     ` Kirill A. Shutemov
  0 siblings, 0 replies; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-09 17:10 UTC (permalink / raw)
  To: Sathyanarayanan Kuppuswamy
  Cc: Kirill A. Shutemov, Dave Hansen, Borislav Petkov,
	Andy Lutomirski, Thomas Gleixner, Elena Reshetova, x86,
	linux-coco, linux-kernel

On Fri, Dec 09, 2022 at 07:50:46AM -0800, Sathyanarayanan Kuppuswamy wrote:
> 
> 
> On 12/9/22 5:25 AM, Kirill A. Shutemov wrote:
> > == Background ==
> > 
> > There is a class of side-channel attacks against SGX enclaves called
> > "SGX Step"[1]. These attacks create lots of exceptions inside of
> > enclaves. Basically, run an in-enclave instruction, cause an exception.
> > Over and over.
> > 
> > There is a concern that a VMM could attack a TDX guest in the same way
> > by causing lots of #VE's. The TDX architecture includes new
> > countermeasures for these attacks. It basically counts the number of
> > exceptions and can send another *special* exception once the number of
> > VMM-induced #VE's hits a critical threshold[2].
> > 
> > == Problem ==
> > 
> > But, these special exceptions are independent of any action that the
> > guest takes. They can occur anywhere that the guest executes. This
> > includes sensitive areas like the entry code. The (non-paranoid) #VE
> > handler is incapable of handling exceptions in these areas.
> > 
> > == Solution ==
> > 
> > Fortunately, the special exceptions can be disabled by the guest via
> > write to NOTIFY_ENABLES TDCS field. NOTIFY_ENABLES is disabled by
> > default, but might be enabled by a bootloader, firmware or an earlier
> > kernel before the current kernel runs.
> > 
> > Disable NOTIFY_ENABLES feature explicitly and unconditionally. Any
> > NOTIFY_ENABLES-based #VE's that occur before this point will end up
> > in the early #VE exception handler and die due to unexpected exit
> > reason.
> > 
> > [1] https://github.com/jovanbulck/sgx-step
> > [2] https://intel.github.io/ccc-linux-guest-hardening-docs/security-spec.html#safety-against-ve-in-kernel-code
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> 
> I don't think you need to explicitly use section names (Background,
> problem or solution) in the commit log. But it is up to you.
> 
> Rest looks good.
> 

I've checked git log and some people leave them in. I've decided to keep
them too.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-09 17:06     ` Kirill A. Shutemov
@ 2022-12-09 20:51       ` Sathyanarayanan Kuppuswamy
  2022-12-12 16:10         ` Dave Hansen
  0 siblings, 1 reply; 26+ messages in thread
From: Sathyanarayanan Kuppuswamy @ 2022-12-09 20:51 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Kirill A. Shutemov, Dave Hansen, Borislav Petkov,
	Andy Lutomirski, Thomas Gleixner, Elena Reshetova, x86,
	linux-coco, linux-kernel



On 12/9/22 9:06 AM, Kirill A. Shutemov wrote:
> On Fri, Dec 09, 2022 at 07:42:56AM -0800, Sathyanarayanan Kuppuswamy wrote:
>>
>>
>> On 12/9/22 5:25 AM, Kirill A. Shutemov wrote:
>>> The check for SEPT_VE_DISABLE happens early in the kernel boot where
>>> earlyprintk is not yet functional. Kernel successfully detect broken
>>> TD configuration and stops the kernel with panic(), but it cannot
>>> communicate the reason to the user.
>>>
>>> Use TDG.VP.VMCALL<ReportFatalError> to report the error. The hypercall
>>> can encode message up to 64 bytes in eight registers.
>>>
>>> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
>>> ---
>>>  arch/x86/coco/tdx/tdx.c | 38 +++++++++++++++++++++++++++++++++++++-
>>>  1 file changed, 37 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
>>> index cfd4c95b9f04..8ad04d101270 100644
>>> --- a/arch/x86/coco/tdx/tdx.c
>>> +++ b/arch/x86/coco/tdx/tdx.c
>>> @@ -22,6 +22,7 @@
>>>  
>>>  /* TDX hypercall Leaf IDs */
>>>  #define TDVMCALL_MAP_GPA		0x10001
>>> +#define TDVMCALL_REPORT_FATAL_ERROR	0x10003
>>>  
>>>  /* MMIO direction */
>>>  #define EPT_READ	0
>>> @@ -140,6 +141,41 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
>>>  }
>>>  EXPORT_SYMBOL_GPL(tdx_mcall_get_report0);
>>>  
>>> +static void __noreturn tdx_panic(const char *msg)
>>> +{
>>> +	struct tdx_hypercall_args args = {
>>> +		.r10 = TDX_HYPERCALL_STANDARD,
>>> +		.r11 = TDVMCALL_REPORT_FATAL_ERROR,
>>> +		.r12 = 0, /* Error code: 0 is Panic */
>>> +	};
>>> +	union {
>>> +		/* Define register order according to the GHCI */
>>> +		struct { u64 r14, r15, rbx, rdi, rsi, r8, r9, rdx; };
>>> +
>>> +		char str[64];
>>> +	} message;
>>> +
>>> +	/* VMM assumes '\0' in byte 65, if the message took all 64 bytes */
>>> +	strncpy(message.str, msg, 64);
>>> +
>>> +	args.r8  = message.r8;
>>> +	args.r9  = message.r9;
>>> +	args.r14 = message.r14;
>>> +	args.r15 = message.r15;
>>> +	args.rdi = message.rdi;
>>> +	args.rsi = message.rsi;
>>> +	args.rbx = message.rbx;
>>> +	args.rdx = message.rdx;
>>> +
>>> +	/*
>>> +	 * Keep calling the hypercall in case VMM did not terminated
>>> +	 * the TD as it must.
>>> +	 */
>>> +	while (1) {
>>> +		__tdx_hypercall(&args, 0);
>>> +	}
>>
>> Instead of an infinite loop, I'm wondering if the guest should panic after
>> retrying for few times.
> 
> Hm. What difference would it make?

IIUC, the goal of this patch is to report the fatal error to VMM and panic.
But, if VMM does not terminate the guest as we expect, rather than trying 
continuously, isn't it better to panic ourselves? That way the behavior
will be similar to what we have currently.

> 

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-09 20:51       ` Sathyanarayanan Kuppuswamy
@ 2022-12-12 16:10         ` Dave Hansen
  2022-12-12 16:37           ` Sathyanarayanan Kuppuswamy
  0 siblings, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2022-12-12 16:10 UTC (permalink / raw)
  To: Sathyanarayanan Kuppuswamy, Kirill A. Shutemov
  Cc: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski,
	Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel

On 12/9/22 12:51, Sathyanarayanan Kuppuswamy wrote:
>>>> +	while (1) {
>>>> +		__tdx_hypercall(&args, 0);
>>>> +	}
>>> Instead of an infinite loop, I'm wondering if the guest should panic after
>>> retrying for few times.
>> Hm. What difference would it make?
> IIUC, the goal of this patch is to report the fatal error to VMM and panic.
> But, if VMM does not terminate the guest as we expect, rather than trying 
> continuously, isn't it better to panic ourselves? That way the behavior
> will be similar to what we have currently.

What does "panic ourselves" mean exactly?  What is the current behavior
which that would match?


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-12 16:10         ` Dave Hansen
@ 2022-12-12 16:37           ` Sathyanarayanan Kuppuswamy
  2022-12-12 16:39             ` Dave Hansen
  0 siblings, 1 reply; 26+ messages in thread
From: Sathyanarayanan Kuppuswamy @ 2022-12-12 16:37 UTC (permalink / raw)
  To: Dave Hansen, Kirill A. Shutemov
  Cc: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski,
	Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel



On 12/12/22 8:10 AM, Dave Hansen wrote:
> On 12/9/22 12:51, Sathyanarayanan Kuppuswamy wrote:
>>>>> +	while (1) {
>>>>> +		__tdx_hypercall(&args, 0);
>>>>> +	}
>>>> Instead of an infinite loop, I'm wondering if the guest should panic after
>>>> retrying for few times.
>>> Hm. What difference would it make?
>> IIUC, the goal of this patch is to report the fatal error to VMM and panic.
>> But, if VMM does not terminate the guest as we expect, rather than trying 
>> continuously, isn't it better to panic ourselves? That way the behavior
>> will be similar to what we have currently.
> 
> What does "panic ourselves" mean exactly?  What is the current behavior
> which that would match?

I meant directly calling panic(). Before this patch, if the SEPT VE DISABLE
attribute was not set, we would call panic(). In this patch, we try to report
the error to VMM and wait for it to terminate the guest in the same case.
But after reporting the error, if VMM does not terminate the guest as expected,
I thought instead of retrying continuously, we can call panic() directly after
some retries. 


> 

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-12 16:37           ` Sathyanarayanan Kuppuswamy
@ 2022-12-12 16:39             ` Dave Hansen
  0 siblings, 0 replies; 26+ messages in thread
From: Dave Hansen @ 2022-12-12 16:39 UTC (permalink / raw)
  To: Sathyanarayanan Kuppuswamy, Kirill A. Shutemov
  Cc: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski,
	Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel

On 12/12/22 08:37, Sathyanarayanan Kuppuswamy wrote:
> On 12/12/22 8:10 AM, Dave Hansen wrote:
>> On 12/9/22 12:51, Sathyanarayanan Kuppuswamy wrote:
>>>>>> +	while (1) {
>>>>>> +		__tdx_hypercall(&args, 0);
>>>>>> +	}
>>>>> Instead of an infinite loop, I'm wondering if the guest should panic after
>>>>> retrying for few times.
>>>> Hm. What difference would it make?
>>> IIUC, the goal of this patch is to report the fatal error to VMM and panic.
>>> But, if VMM does not terminate the guest as we expect, rather than trying 
>>> continuously, isn't it better to panic ourselves? That way the behavior
>>> will be similar to what we have currently.
>> What does "panic ourselves" mean exactly?  What is the current behavior
>> which that would match?
> I meant directly calling panic(). Before this patch, if the SEPT VE DISABLE
> attribute was not set, we would call panic(). In this patch, we try to report
> the error to VMM and wait for it to terminate the guest in the same case.
> But after reporting the error, if VMM does not terminate the guest as expected,
> I thought instead of retrying continuously, we can call panic() directly after
> some retries. 

Could you explain how panic() is better than retrying?

You might also want to go look at the original changelog for this patch.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/4] x86/tdx: Expand __tdx_hypercall() to handle more arguments
  2022-12-09 13:25 ` [PATCH 1/4] x86/tdx: Expand __tdx_hypercall() to handle more arguments Kirill A. Shutemov
@ 2022-12-13 22:44   ` Dave Hansen
  0 siblings, 0 replies; 26+ messages in thread
From: Dave Hansen @ 2022-12-13 22:44 UTC (permalink / raw)
  To: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel

On 12/9/22 05:25, Kirill A. Shutemov wrote:
> So far __tdx_hypercall() only handles six arguments for VMCALL.
> Expanding it to six more register would allow to cover more use-cases.

like...

> Using RDI and RSI as VMCALL arguments requires more register shuffling.
> RAX is used to hold tdx_hypercall_args pointer and RBP stores flags.
> 
> While there, fix typo in the comment on panic branch.

This isn't the world's largest patch, but it doing at least three or
four different logical things.  Ideally, you'd do the restructuring in a
couple of patches and then the last one would just add the six new
registers.

Could you take 20 minutes and see if you can break this down into three
or four patches?

It looks _ok_.  I'd almost ack it as-is, but it should be pretty simple
to break down.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-09 13:25 ` [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE Kirill A. Shutemov
  2022-12-09 15:42   ` Sathyanarayanan Kuppuswamy
@ 2022-12-13 23:06   ` Dave Hansen
  2022-12-15 17:12     ` Kirill A. Shutemov
  1 sibling, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2022-12-13 23:06 UTC (permalink / raw)
  To: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel

On 12/9/22 05:25, Kirill A. Shutemov wrote:
> The check for SEPT_VE_DISABLE happens early in the kernel boot where
> earlyprintk is not yet functional. Kernel successfully detect broken
> TD configuration and stops the kernel with panic(), but it cannot
> communicate the reason to the user.

Linux TDX guests require that the SEPT_VE_DISABLE "attribute" be set.
If it is not set, the kernel is theoretically required to handle
exceptions anywhere that kernel memory is accessed, including places
like NMI handlers and in the syscall entry gap.

Rather than even try to handle these exceptions, the kernel refuses to
run if SEPT_VE_DISABLE is unset.

However, the SEPT_VE_DISABLE detection and refusal code happens very
early in boot, even before earlyprintk runs.  Calling panic() will
effectively just hang the system.

Instead, call a TDX-specific panic() function.  This makes a very simple
TDVMCALL which gets a short error string out to the hypervisor without
any console infrastructure.

--

Is that better?

Also, are you sure we want to do this?  Is there any way to do this
inside of panic() itself to get panic() itself to call tdx_panic() and
get a short error message out to the hypervisor?

Getting *all* users of panic this magic ability would be a lot better
than giving it to one call-site of panic().

I'm all for making the panic() path as short and simple as possible, but
it would be nice if this fancy hypercall would get used in more than one
spot.

> Use TDG.VP.VMCALL<ReportFatalError> to report the error. The hypercall
> can encode message up to 64 bytes in eight registers.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/coco/tdx/tdx.c | 38 +++++++++++++++++++++++++++++++++++++-
>  1 file changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index cfd4c95b9f04..8ad04d101270 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -22,6 +22,7 @@
>  
>  /* TDX hypercall Leaf IDs */
>  #define TDVMCALL_MAP_GPA		0x10001
> +#define TDVMCALL_REPORT_FATAL_ERROR	0x10003
>  
>  /* MMIO direction */
>  #define EPT_READ	0
> @@ -140,6 +141,41 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
>  }
>  EXPORT_SYMBOL_GPL(tdx_mcall_get_report0);
>  
> +static void __noreturn tdx_panic(const char *msg)
> +{
> +	struct tdx_hypercall_args args = {
> +		.r10 = TDX_HYPERCALL_STANDARD,
> +		.r11 = TDVMCALL_REPORT_FATAL_ERROR,
> +		.r12 = 0, /* Error code: 0 is Panic */
> +	};
> +	union {
> +		/* Define register order according to the GHCI */
> +		struct { u64 r14, r15, rbx, rdi, rsi, r8, r9, rdx; };
> +
> +		char str[64];
> +	} message;
> +
> +	/* VMM assumes '\0' in byte 65, if the message took all 64 bytes */
> +	strncpy(message.str, msg, 64);
> +
> +	args.r8  = message.r8;
> +	args.r9  = message.r9;
> +	args.r14 = message.r14;
> +	args.r15 = message.r15;
> +	args.rdi = message.rdi;
> +	args.rsi = message.rsi;
> +	args.rbx = message.rbx;
> +	args.rdx = message.rdx;

I dunno.  Is that struct/union better, or would something like this be
more readable:

	args.r8  = *(u64 *)&message[48];
	args.r9  = *(u64 *)&message[56];

and just hard-code the offsets.

> +	/*
> +	 * Keep calling the hypercall in case VMM did not terminated

							terminate^

> +	 * the TD as it must.
> +	 */
> +	while (1) {
> +		__tdx_hypercall(&args, 0);
> +	}
> +}
> +
>  static void tdx_parse_tdinfo(u64 *cc_mask)
>  {
>  	struct tdx_module_output out;
> @@ -172,7 +208,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
>  	 */
>  	td_attr = out.rdx;
>  	if (!(td_attr & ATTR_SEPT_VE_DISABLE))
> -		panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n");
> +		tdx_panic("TD misconfiguration: SEPT_VE_DISABLE attribute must be set.");
>  }

Would it be worth making it more clear when the message is truncated?
Maybe something like:

	if (strlen(msg) > 64) {
		len = 64
		strncpy(&msg[61], "...", 3);
	}

I'm sure I have five off-by-one bugs in there, but you get the idea.
Can we stick a "..." at the end of things that get truncated?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD
  2022-12-09 13:25 ` [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD Kirill A. Shutemov
  2022-12-09 15:45   ` Sathyanarayanan Kuppuswamy
@ 2022-12-13 23:13   ` Dave Hansen
  2022-12-15 15:40     ` Kirill A. Shutemov
  1 sibling, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2022-12-13 23:13 UTC (permalink / raw)
  To: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel

On 12/9/22 05:25, Kirill A. Shutemov wrote:
> SEPT_VE_DISABLE check is required to keep the TD protected from VMM
> attacks, but it makes harder to debug guest kernel bugs. If guest
> touches unaccepted memory the TD will get terminated without any
> traces on what has happened.

This is a bit sparse.

--

A "SEPT #VE" occurs when a TDX guest touches memory that is not properly
mapped into the "secure EPT".  This can be the result of hypervisor
attacks or bugs, *OR* guest bugs.  Most notably, buggy guests might
touch unaccepted memory for lots of different memory safety bugs like
buffer overflows.

TDX guests do not want to continue in the face of hypervisor attacks or
hypervisor bugs.  They want to terminate as fast and safely as possible.
 SEPT_VE_DISABLE ensures that TDX guests *can't* continue in the face of
these kinds of issues.

But, that causes a problem.  TDX guests that can't continue can't spit
out oopses or other debugging info.  In essence SEPT_VE_DISABLE=1 guests
are not debuggable.  That's a problem.

--

Eh?

> Relax the SEPT_VE_DISABLE check to warning on debug TD and panic() in
> the #VE handler on EPT-violation on private memory. It will produce
> useful backtrace.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/coco/tdx/tdx.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 8ad04d101270..0e47846ff8ff 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -38,6 +38,7 @@
>  #define VE_GET_PORT_NUM(e)	((e) >> 16)
>  #define VE_IS_IO_STRING(e)	((e) & BIT(4))
>  
> +#define ATTR_DEBUG		BIT(0)
>  #define ATTR_SEPT_VE_DISABLE	BIT(28)
>  
>  /* TDX Module call error codes */
> @@ -207,8 +208,15 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
>  	 * TD-private memory.  Only VMM-shared memory (MMIO) will #VE.
>  	 */
>  	td_attr = out.rdx;
> -	if (!(td_attr & ATTR_SEPT_VE_DISABLE))
> -		tdx_panic("TD misconfiguration: SEPT_VE_DISABLE attribute must be set.");
> +	if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
> +		const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
> +
> +		/* Relax SEPT_VE_DISABLE check for debug TD. */
> +		if (td_attr & ATTR_DEBUG)
> +			pr_warn("%s\n", msg);
> +		else
> +			tdx_panic(msg);
> +	}
>  }
>  
>  /*
> @@ -682,6 +690,8 @@ static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
>  	case EXIT_REASON_CPUID:
>  		return handle_cpuid(regs, ve);
>  	case EXIT_REASON_EPT_VIOLATION:
> +		if (ve->gpa != cc_mkdec(ve->gpa))
> +			panic("Unexpected EPT-violation on private memory.");

What's the cc_mkdec() doing?


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES
  2022-12-09 13:25 ` [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES Kirill A. Shutemov
  2022-12-09 15:50   ` Sathyanarayanan Kuppuswamy
@ 2022-12-13 23:17   ` Dave Hansen
  1 sibling, 0 replies; 26+ messages in thread
From: Dave Hansen @ 2022-12-13 23:17 UTC (permalink / raw)
  To: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski
  Cc: Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel

On 12/9/22 05:25, Kirill A. Shutemov wrote:
> == Background ==
> 
> There is a class of side-channel attacks against SGX enclaves called
> "SGX Step"[1]. These attacks create lots of exceptions inside of
> enclaves. Basically, run an in-enclave instruction, cause an exception.
> Over and over.
> 
> There is a concern that a VMM could attack a TDX guest in the same way
> by causing lots of #VE's. The TDX architecture includes new
> countermeasures for these attacks. It basically counts the number of
> exceptions and can send another *special* exception once the number of
> VMM-induced #VE's hits a critical threshold[2].
> 
> == Problem ==
> 
> But, these special exceptions are independent of any action that the
> guest takes. They can occur anywhere that the guest executes. This
> includes sensitive areas like the entry code. The (non-paranoid) #VE
> handler is incapable of handling exceptions in these areas.
> 
> == Solution ==
> 
> Fortunately, the special exceptions can be disabled by the guest via
> write to NOTIFY_ENABLES TDCS field. NOTIFY_ENABLES is disabled by
> default, but might be enabled by a bootloader, firmware or an earlier
> kernel before the current kernel runs.
> 
> Disable NOTIFY_ENABLES feature explicitly and unconditionally. Any
> NOTIFY_ENABLES-based #VE's that occur before this point will end up
> in the early #VE exception handler and die due to unexpected exit
> reason.
> 
> [1] https://github.com/jovanbulck/sgx-step
> [2] https://intel.github.io/ccc-linux-guest-hardening-docs/security-spec.html#safety-against-ve-in-kernel-code
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Reviewed-by: Dave Hansen <dave.hansen@intel.com>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD
  2022-12-13 23:13   ` Dave Hansen
@ 2022-12-15 15:40     ` Kirill A. Shutemov
  0 siblings, 0 replies; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-15 15:40 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Borislav Petkov, Andy Lutomirski, Kuppuswamy Sathyanarayanan,
	Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel

On Tue, Dec 13, 2022 at 03:13:43PM -0800, Dave Hansen wrote:
> On 12/9/22 05:25, Kirill A. Shutemov wrote:
> > SEPT_VE_DISABLE check is required to keep the TD protected from VMM
> > attacks, but it makes harder to debug guest kernel bugs. If guest
> > touches unaccepted memory the TD will get terminated without any
> > traces on what has happened.
> 
> This is a bit sparse.
> 
> --
> 
> A "SEPT #VE" occurs when a TDX guest touches memory that is not properly
> mapped into the "secure EPT".  This can be the result of hypervisor
> attacks or bugs, *OR* guest bugs.  Most notably, buggy guests might
> touch unaccepted memory for lots of different memory safety bugs like
> buffer overflows.
> 
> TDX guests do not want to continue in the face of hypervisor attacks or
> hypervisor bugs.  They want to terminate as fast and safely as possible.
>  SEPT_VE_DISABLE ensures that TDX guests *can't* continue in the face of
> these kinds of issues.
> 
> But, that causes a problem.  TDX guests that can't continue can't spit
> out oopses or other debugging info.  In essence SEPT_VE_DISABLE=1 guests
> are not debuggable.  That's a problem.
> 
> --
> 
> Eh?

Thanks!

> > Relax the SEPT_VE_DISABLE check to warning on debug TD and panic() in
> > the #VE handler on EPT-violation on private memory. It will produce
> > useful backtrace.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> >  arch/x86/coco/tdx/tdx.c | 14 ++++++++++++--
> >  1 file changed, 12 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> > index 8ad04d101270..0e47846ff8ff 100644
> > --- a/arch/x86/coco/tdx/tdx.c
> > +++ b/arch/x86/coco/tdx/tdx.c
> > @@ -38,6 +38,7 @@
> >  #define VE_GET_PORT_NUM(e)	((e) >> 16)
> >  #define VE_IS_IO_STRING(e)	((e) & BIT(4))
> >  
> > +#define ATTR_DEBUG		BIT(0)
> >  #define ATTR_SEPT_VE_DISABLE	BIT(28)
> >  
> >  /* TDX Module call error codes */
> > @@ -207,8 +208,15 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
> >  	 * TD-private memory.  Only VMM-shared memory (MMIO) will #VE.
> >  	 */
> >  	td_attr = out.rdx;
> > -	if (!(td_attr & ATTR_SEPT_VE_DISABLE))
> > -		tdx_panic("TD misconfiguration: SEPT_VE_DISABLE attribute must be set.");
> > +	if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
> > +		const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
> > +
> > +		/* Relax SEPT_VE_DISABLE check for debug TD. */
> > +		if (td_attr & ATTR_DEBUG)
> > +			pr_warn("%s\n", msg);
> > +		else
> > +			tdx_panic(msg);
> > +	}
> >  }
> >  
> >  /*
> > @@ -682,6 +690,8 @@ static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
> >  	case EXIT_REASON_CPUID:
> >  		return handle_cpuid(regs, ve);
> >  	case EXIT_REASON_EPT_VIOLATION:
> > +		if (ve->gpa != cc_mkdec(ve->gpa))
> > +			panic("Unexpected EPT-violation on private memory.");
> 
> What's the cc_mkdec() doing?

Checks if the GPA is private. I will move it to helper. Like this:

static inline bool is_private_gpa(u64 gpa)
{
	return gpa == cc_mkenc(gpa);
}

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-13 23:06   ` Dave Hansen
@ 2022-12-15 17:12     ` Kirill A. Shutemov
  2022-12-15 18:18       ` Dave Hansen
  0 siblings, 1 reply; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-15 17:12 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Borislav Petkov, Andy Lutomirski, Kuppuswamy Sathyanarayanan,
	Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel

On Tue, Dec 13, 2022 at 03:06:07PM -0800, Dave Hansen wrote:
> On 12/9/22 05:25, Kirill A. Shutemov wrote:
> > The check for SEPT_VE_DISABLE happens early in the kernel boot where
> > earlyprintk is not yet functional. Kernel successfully detect broken
> > TD configuration and stops the kernel with panic(), but it cannot
> > communicate the reason to the user.
> 
> Linux TDX guests require that the SEPT_VE_DISABLE "attribute" be set.
> If it is not set, the kernel is theoretically required to handle
> exceptions anywhere that kernel memory is accessed, including places
> like NMI handlers and in the syscall entry gap.
> 
> Rather than even try to handle these exceptions, the kernel refuses to
> run if SEPT_VE_DISABLE is unset.
> 
> However, the SEPT_VE_DISABLE detection and refusal code happens very
> early in boot, even before earlyprintk runs.  Calling panic() will
> effectively just hang the system.
> 
> Instead, call a TDX-specific panic() function.  This makes a very simple
> TDVMCALL which gets a short error string out to the hypervisor without
> any console infrastructure.
> 
> --
> 
> Is that better?

Yes, thank you.

> Also, are you sure we want to do this?  Is there any way to do this
> inside of panic() itself to get panic() itself to call tdx_panic() and
> get a short error message out to the hypervisor?
> 
> Getting *all* users of panic this magic ability would be a lot better
> than giving it to one call-site of panic().
> 
> I'm all for making the panic() path as short and simple as possible, but
> it would be nice if this fancy hypercall would get used in more than one
> spot.

Well, I don't see an obvious way to integrate this into panic().

There is panic_notifier_list and it kinda/sorta works, see the patch
below.

But it breaks panic_notifier_list contract: the callback will never return
and no other callback will be able to do their stuff. panic_timeout is
also broken.

So ReportFatalError() is no good for the task. And I don't have anything
else :/

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 83ca9a7f0b75..81f9a964dc1f 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -7,6 +7,7 @@
 #include <linux/cpufeature.h>
 #include <linux/export.h>
 #include <linux/io.h>
+#include <linux/panic_notifier.h>
 #include <asm/coco.h>
 #include <asm/tdx.h>
 #include <asm/vmx.h>
@@ -146,8 +147,10 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
 }
 EXPORT_SYMBOL_GPL(tdx_mcall_get_report0);
 
-static void __noreturn tdx_panic(const char *msg)
+static int tdx_panic(struct notifier_block *this,
+				 unsigned long event, void *ptr)
 {
+	const char *msg = ptr;
 	struct tdx_hypercall_args args = {
 		.r10 = TDX_HYPERCALL_STANDARD,
 		.r11 = TDVMCALL_REPORT_FATAL_ERROR,
@@ -219,7 +222,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
 		if (td_attr & ATTR_DEBUG)
 			pr_warn("%s\n", msg);
 		else
-			tdx_panic(msg);
+			panic(msg);
 	}
 }
 
@@ -851,6 +854,10 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
 	return true;
 }
 
+static struct notifier_block panic_block = {
+	.notifier_call = tdx_panic,
+};
+
 void __init tdx_early_init(void)
 {
 	u64 cc_mask;
@@ -863,6 +870,7 @@ void __init tdx_early_init(void)
 
 	setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
 
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 	cc_set_vendor(CC_VENDOR_INTEL);
 	tdx_parse_tdinfo(&cc_mask);
 	cc_set_mask(cc_mask);
-- 
  Kiryl Shutsemau / Kirill A. Shutemov

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-15 17:12     ` Kirill A. Shutemov
@ 2022-12-15 18:18       ` Dave Hansen
  2022-12-15 18:51         ` Kirill A. Shutemov
  0 siblings, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2022-12-15 18:18 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Borislav Petkov, Andy Lutomirski, Kuppuswamy Sathyanarayanan,
	Thomas Gleixner, Elena Reshetova, x86, linux-coco, linux-kernel

On 12/15/22 09:12, Kirill A. Shutemov wrote:
>> Getting *all* users of panic this magic ability would be a lot better
>> than giving it to one call-site of panic().
>>
>> I'm all for making the panic() path as short and simple as possible, but
>> it would be nice if this fancy hypercall would get used in more than one
>> spot.
> Well, I don't see an obvious way to integrate this into panic().
> 
> There is panic_notifier_list and it kinda/sorta works, see the patch
> below.
> 
> But it breaks panic_notifier_list contract: the callback will never return
> and no other callback will be able to do their stuff. panic_timeout is
> also broken.
> 
> So ReportFatalError() is no good for the task. And I don't have anything
> else :/

Do we *really* have to do a hard stop when SEPT_VE_DISABLE is missing?

Wouldn't it be simpler to just defer the check until we can spit out a
sane error message about it?

Or is there too much security exposure by continuing?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-15 18:18       ` Dave Hansen
@ 2022-12-15 18:51         ` Kirill A. Shutemov
  2022-12-15 21:09           ` Dave Hansen
  0 siblings, 1 reply; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-15 18:51 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski,
	Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel

On Thu, Dec 15, 2022 at 10:18:24AM -0800, Dave Hansen wrote:
> On 12/15/22 09:12, Kirill A. Shutemov wrote:
> >> Getting *all* users of panic this magic ability would be a lot better
> >> than giving it to one call-site of panic().
> >>
> >> I'm all for making the panic() path as short and simple as possible, but
> >> it would be nice if this fancy hypercall would get used in more than one
> >> spot.
> > Well, I don't see an obvious way to integrate this into panic().
> > 
> > There is panic_notifier_list and it kinda/sorta works, see the patch
> > below.
> > 
> > But it breaks panic_notifier_list contract: the callback will never return
> > and no other callback will be able to do their stuff. panic_timeout is
> > also broken.
> > 
> > So ReportFatalError() is no good for the task. And I don't have anything
> > else :/
> 
> Do we *really* have to do a hard stop when SEPT_VE_DISABLE is missing?
> 
> Wouldn't it be simpler to just defer the check until we can spit out a
> sane error message about it?
> 
> Or is there too much security exposure by continuing?

Well, I guess we can. We always have attestation as a backstop. No
sensitive user data has to be exposed to the TD before it passed
the attestation.

Do you prefer to have a separate initcall just to check SEPT_VE_DISABLE?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-15 18:51         ` Kirill A. Shutemov
@ 2022-12-15 21:09           ` Dave Hansen
  2022-12-16  2:38             ` Kirill A. Shutemov
  0 siblings, 1 reply; 26+ messages in thread
From: Dave Hansen @ 2022-12-15 21:09 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski,
	Kuppuswamy Sathyanarayanan, Thomas Gleixner, Elena Reshetova,
	x86, linux-coco, linux-kernel

On 12/15/22 10:51, Kirill A. Shutemov wrote:
>>> So ReportFatalError() is no good for the task. And I don't have anything
>>> else :/
>> Do we *really* have to do a hard stop when SEPT_VE_DISABLE is missing?
>>
>> Wouldn't it be simpler to just defer the check until we can spit out a
>> sane error message about it?
>>
>> Or is there too much security exposure by continuing?
> Well, I guess we can. We always have attestation as a backstop. No
> sensitive user data has to be exposed to the TD before it passed
> the attestation.

OK, so let's just pretend that SEPT_VE_DISABLE=0 is a blatant root hole
that lets the VMM compromise the TDX guest (I know it's not, but let's
just pretend it is).

The guest starts up, the VMM compromises it after the attestation has
run.  The now compromised guest send along its report.  But, since the
report contains (or implies???) SEPT_VE_DISABLE=0, the guest will be
assumed to be compromised and won't get any secrets provisioned?

That assumes that the attestation service knows that SEPT_VE_DISABLE==0
plus Linux is bad.  Is that a good assumption?

> Do you prefer to have a separate initcall just to check SEPT_VE_DISABLE?

I don't feel strongly about where the check should be as long as it can
get a message out to the console.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-15 21:09           ` Dave Hansen
@ 2022-12-16  2:38             ` Kirill A. Shutemov
  2022-12-16 15:22               ` Reshetova, Elena
  0 siblings, 1 reply; 26+ messages in thread
From: Kirill A. Shutemov @ 2022-12-16  2:38 UTC (permalink / raw)
  To: Dave Hansen, Elena Reshetova
  Cc: Kirill A. Shutemov, Borislav Petkov, Andy Lutomirski,
	Kuppuswamy Sathyanarayanan, Thomas Gleixner, x86, linux-coco,
	linux-kernel

On Thu, Dec 15, 2022 at 01:09:10PM -0800, Dave Hansen wrote:
> On 12/15/22 10:51, Kirill A. Shutemov wrote:
> >>> So ReportFatalError() is no good for the task. And I don't have anything
> >>> else :/
> >> Do we *really* have to do a hard stop when SEPT_VE_DISABLE is missing?
> >>
> >> Wouldn't it be simpler to just defer the check until we can spit out a
> >> sane error message about it?
> >>
> >> Or is there too much security exposure by continuing?
> > Well, I guess we can. We always have attestation as a backstop. No
> > sensitive user data has to be exposed to the TD before it passed
> > the attestation.
> 
> OK, so let's just pretend that SEPT_VE_DISABLE=0 is a blatant root hole
> that lets the VMM compromise the TDX guest (I know it's not, but let's
> just pretend it is).
> 
> The guest starts up, the VMM compromises it after the attestation has
> run.  The now compromised guest send along its report.  But, since the
> report contains (or implies???) SEPT_VE_DISABLE=0, the guest will be
> assumed to be compromised and won't get any secrets provisioned?
> 
> That assumes that the attestation service knows that SEPT_VE_DISABLE==0
> plus Linux is bad.  Is that a good assumption?

I know that attestation quote includes all required information
(attributes and kernel hash) to make the decision and I assume that
attestation service is competent. So, yes, I think expectation Linux +
SEPT_VE_DISABLE==0 going to be rejected is reasonable.

Elena, is there anything you can elaborate on here?

> > Do you prefer to have a separate initcall just to check SEPT_VE_DISABLE?
> 
> I don't feel strongly about where the check should be as long as it can
> get a message out to the console.

I would rather keep current approach with simple tdx_panic() for early
use if it works for you.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE
  2022-12-16  2:38             ` Kirill A. Shutemov
@ 2022-12-16 15:22               ` Reshetova, Elena
  0 siblings, 0 replies; 26+ messages in thread
From: Reshetova, Elena @ 2022-12-16 15:22 UTC (permalink / raw)
  To: Kirill A. Shutemov, Hansen, Dave
  Cc: Kirill A. Shutemov, Borislav Petkov, Lutomirski, Andy,
	Kuppuswamy Sathyanarayanan, Thomas Gleixner, x86, linux-coco,
	linux-kernel


> 
> On Thu, Dec 15, 2022 at 01:09:10PM -0800, Dave Hansen wrote:
> > On 12/15/22 10:51, Kirill A. Shutemov wrote:
> > >>> So ReportFatalError() is no good for the task. And I don't have anything
> > >>> else :/
> > >> Do we *really* have to do a hard stop when SEPT_VE_DISABLE is missing?
> > >>
> > >> Wouldn't it be simpler to just defer the check until we can spit out a
> > >> sane error message about it?
> > >>
> > >> Or is there too much security exposure by continuing?
> > > Well, I guess we can. We always have attestation as a backstop. No
> > > sensitive user data has to be exposed to the TD before it passed
> > > the attestation.
> >
> > OK, so let's just pretend that SEPT_VE_DISABLE=0 is a blatant root hole
> > that lets the VMM compromise the TDX guest (I know it's not, but let's
> > just pretend it is).
> >
> > The guest starts up, the VMM compromises it after the attestation has
> > run.  The now compromised guest send along its report.  But, since the
> > report contains (or implies???) SEPT_VE_DISABLE=0, the guest will be
> > assumed to be compromised and won't get any secrets provisioned?
> >
> > That assumes that the attestation service knows that SEPT_VE_DISABLE==0
> > plus Linux is bad.  Is that a good assumption?
> 
> I know that attestation quote includes all required information
> (attributes and kernel hash) to make the decision and I assume that
> attestation service is competent. So, yes, I think expectation Linux +
> SEPT_VE_DISABLE==0 going to be rejected is reasonable.
> 
> Elena, is there anything you can elaborate on here?

Yes, attestation quote has the attribute included for SEPT_VE_DISABLE.
So the remote verifier can check this, *if* it understands that it is important. 
However, it is a big *IF* imo. In TDX module spec and attestation specs, 
SEPT_VE_DISABLE is marked as attribute that "potentially impacts security"
vs TUD attributes like DEBUG that are classified as "your TD is not secure at all".
So, we will be relying on verifiers to understand that in Linux case it is a critical
thing vs "potentially impacting security thing".
We will document this specifically in our TDX guest kernel documentation,
but I have no guarantees on how careful people are reading it.  
My preference is to do the right thing in code.

Best Regards,
Elena. 

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2022-12-16 15:22 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-09 13:25 [PATCH 0/4] x86/tdx: Changes for TDX guest initialization Kirill A. Shutemov
2022-12-09 13:25 ` [PATCH 1/4] x86/tdx: Expand __tdx_hypercall() to handle more arguments Kirill A. Shutemov
2022-12-13 22:44   ` Dave Hansen
2022-12-09 13:25 ` [PATCH 2/4] x86/tdx: Use ReportFatalError to report missing SEPT_VE_DISABLE Kirill A. Shutemov
2022-12-09 15:42   ` Sathyanarayanan Kuppuswamy
2022-12-09 17:06     ` Kirill A. Shutemov
2022-12-09 20:51       ` Sathyanarayanan Kuppuswamy
2022-12-12 16:10         ` Dave Hansen
2022-12-12 16:37           ` Sathyanarayanan Kuppuswamy
2022-12-12 16:39             ` Dave Hansen
2022-12-13 23:06   ` Dave Hansen
2022-12-15 17:12     ` Kirill A. Shutemov
2022-12-15 18:18       ` Dave Hansen
2022-12-15 18:51         ` Kirill A. Shutemov
2022-12-15 21:09           ` Dave Hansen
2022-12-16  2:38             ` Kirill A. Shutemov
2022-12-16 15:22               ` Reshetova, Elena
2022-12-09 13:25 ` [PATCH 3/4] x86/tdx: Relax SEPT_VE_DISABLE check for debug TD Kirill A. Shutemov
2022-12-09 15:45   ` Sathyanarayanan Kuppuswamy
2022-12-09 17:08     ` Kirill A. Shutemov
2022-12-13 23:13   ` Dave Hansen
2022-12-15 15:40     ` Kirill A. Shutemov
2022-12-09 13:25 ` [PATCH 4/4] x86/tdx: Disable NOTIFY_ENABLES Kirill A. Shutemov
2022-12-09 15:50   ` Sathyanarayanan Kuppuswamy
2022-12-09 17:10     ` Kirill A. Shutemov
2022-12-13 23:17   ` Dave Hansen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).