On 2020-11-04 15:54, Jarkko Sakkinen wrote: > From: Sean Christopherson > > Enclaves encounter exceptions for lots of reasons: everything from enclave > page faults to NULL pointer dereferences, to system calls that must be > “proxied” to the kernel from outside the enclave. > > In addition to the code contained inside an enclave, there is also > supporting code outside the enclave called an “SGX runtime”, which is > virtually always implemented inside a shared library. The runtime helps > build the enclave and handles things like *re*building the enclave if it > got destroyed by something like a suspend/resume cycle. > > The rebuilding has traditionally been handled in SIGSEGV handlers, > registered by the library. But, being process-wide, shared state, signal > handling and shared libraries do not mix well. > > Introduce a vDSO function call that wraps the enclave entry functions > (EENTER/ERESUME functions of the ENCLU instruciton) and returns information > about any exceptions to the caller in the SGX runtime. > > Instead of generating a signal, the kernel places exception information in > RDI, RSI and RDX. The kernel-provided userspace portion of the vDSO handler > will place this information in a user-provided buffer or trigger a > user-provided callback at the time of the exception. > > The vDSO function calling convention uses the standard RDI RSI, RDX, RCX, > R8 and R9 registers. This makes it possible to declare the vDSO as a C > prototype, but other than that there is no specific support for SystemV > ABI. Things like storing XSAVE are the responsibility of the enclave and > the runtime. > > Suggested-by: Andy Lutomirski > Acked-by: Jethro Beekman > Tested-by: Jethro Beekman > Signed-off-by: Sean Christopherson > Co-developed-by: Cedric Xing > Signed-off-by: Cedric Xing > Co-developed-by: Jarkko Sakkinen > Signed-off-by: Jarkko Sakkinen > --- > Changes from v39: > * Relayout out the user handler documentation: return values are described > in sgx_enclave_user_handler_t keneldoc and broad description is given > in struct sgx_enclave_run kerneldoc. > * Rename @leaf as @function, given that we want to speak consistently > about ENCLS and ENCLU functions. > * Reorder user_handler and user_data as the last fields in > sgx_enclave_run, as they are an extension to the basic functionality. > > arch/x86/entry/vdso/Makefile | 2 + > arch/x86/entry/vdso/vdso.lds.S | 1 + > arch/x86/entry/vdso/vsgx.S | 151 ++++++++++++++++++++++++++++++++ > arch/x86/include/asm/enclu.h | 9 ++ > arch/x86/include/uapi/asm/sgx.h | 91 +++++++++++++++++++ > 5 files changed, 254 insertions(+) > create mode 100644 arch/x86/entry/vdso/vsgx.S > create mode 100644 arch/x86/include/asm/enclu.h > > diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile > index 2ad757fb3c23..9915fbd34264 100644 > --- a/arch/x86/entry/vdso/Makefile > +++ b/arch/x86/entry/vdso/Makefile > @@ -27,6 +27,7 @@ VDSO32-$(CONFIG_IA32_EMULATION) := y > vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o > vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o > vobjs32-y += vdso32/vclock_gettime.o > +vobjs-$(VDSO64-y) += vsgx.o > > # files to link into kernel > obj-y += vma.o extable.o > @@ -98,6 +99,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS > CFLAGS_REMOVE_vclock_gettime.o = -pg > CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg > CFLAGS_REMOVE_vgetcpu.o = -pg > +CFLAGS_REMOVE_vsgx.o = -pg > > # > # X32 processes use x32 vDSO to access 64bit kernel data. > diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S > index 36b644e16272..4bf48462fca7 100644 > --- a/arch/x86/entry/vdso/vdso.lds.S > +++ b/arch/x86/entry/vdso/vdso.lds.S > @@ -27,6 +27,7 @@ VERSION { > __vdso_time; > clock_getres; > __vdso_clock_getres; > + __vdso_sgx_enter_enclave; > local: *; > }; > } > diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S > new file mode 100644 > index 000000000000..86a0e94f68df > --- /dev/null > +++ b/arch/x86/entry/vdso/vsgx.S > @@ -0,0 +1,151 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#include > +#include > +#include > +#include > + > +#include "extable.h" > + > +/* Relative to %rbp. */ > +#define SGX_ENCLAVE_OFFSET_OF_RUN 16 > + > +/* The offsets relative to struct sgx_enclave_run. */ > +#define SGX_ENCLAVE_RUN_TCS 0 > +#define SGX_ENCLAVE_RUN_LEAF 8 > +#define SGX_ENCLAVE_RUN_EXCEPTION_VECTOR 12 > +#define SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE 14 > +#define SGX_ENCLAVE_RUN_EXCEPTION_ADDR 16 > +#define SGX_ENCLAVE_RUN_USER_HANDLER 24 > +#define SGX_ENCLAVE_RUN_USER_DATA 32 /* not used */ > +#define SGX_ENCLAVE_RUN_RESERVED_START 40 > +#define SGX_ENCLAVE_RUN_RESERVED_END 256 > + > +.code64 > +.section .text, "ax" > + > +SYM_FUNC_START(__vdso_sgx_enter_enclave) > + /* Prolog */ > + .cfi_startproc > + push %rbp > + .cfi_adjust_cfa_offset 8 > + .cfi_rel_offset %rbp, 0 > + mov %rsp, %rbp > + .cfi_def_cfa_register %rbp > + push %rbx > + .cfi_rel_offset %rbx, -8 > + > + mov %ecx, %eax > +.Lenter_enclave: > + /* EENTER <= function <= ERESUME */ > + cmp $EENTER, %eax > + jb .Linvalid_input > + cmp $ERESUME, %eax > + ja .Linvalid_input > + > + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rcx > + > + /* Validate that the reserved area contains only zeros. */ > + mov $SGX_ENCLAVE_RUN_RESERVED_START, %rbx > +1: > + cmpq $0, (%rcx, %rbx) > + jne .Linvalid_input > + add $8, %rbx > + cmpq $SGX_ENCLAVE_RUN_RESERVED_END, %rbx > + jne 1b > + > + /* Load TCS and AEP */ > + mov SGX_ENCLAVE_RUN_TCS(%rcx), %rbx > + lea .Lasync_exit_pointer(%rip), %rcx > + > + /* Single ENCLU serving as both EENTER and AEP (ERESUME) */ > +.Lasync_exit_pointer: > +.Lenclu_eenter_eresume: > + enclu > + > + /* EEXIT jumps here unless the enclave is doing something fancy. */ > + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rbx > + > + /* Set exit_reason. */ > + movl $EEXIT, SGX_ENCLAVE_RUN_LEAF(%rbx) > + > + /* Invoke userspace's exit handler if one was provided. */ > +.Lhandle_exit: > + cmpq $0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx) > + jne .Linvoke_userspace_handler > + > + /* Success, in the sense that ENCLU was attempted. */ > + xor %eax, %eax > + > +.Lout: > + pop %rbx > + leave > + .cfi_def_cfa %rsp, 8 > + ret > + > + /* The out-of-line code runs with the pre-leave stack frame. */ > + .cfi_def_cfa %rbp, 16 > + > +.Linvalid_input: > + mov $(-EINVAL), %eax > + jmp .Lout > + > +.Lhandle_exception: > + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rbx > + > + /* Set the exception info. */ > + mov %eax, (SGX_ENCLAVE_RUN_LEAF)(%rbx) > + mov %di, (SGX_ENCLAVE_RUN_EXCEPTION_VECTOR)(%rbx) > + mov %si, (SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE)(%rbx) > + mov %rdx, (SGX_ENCLAVE_RUN_EXCEPTION_ADDR)(%rbx) > + jmp .Lhandle_exit > + > +.Linvoke_userspace_handler: > + /* Pass the untrusted RSP (at exit) to the callback via %rcx. */ > + mov %rsp, %rcx > + > + /* Save struct sgx_enclave_exception %rbx is about to be clobbered. */ > + mov %rbx, %rax > + > + /* Save the untrusted RSP offset in %rbx (non-volatile register). */ > + mov %rsp, %rbx > + and $0xf, %rbx > + > + /* > + * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned > + * _after_ pushing the parameters on the stack, hence the bonus push. > + */ > + and $-0x10, %rsp > + push %rax > + > + /* Push struct sgx_enclave_exception as a param to the callback. */ > + push %rax > + > + /* Clear RFLAGS.DF per x86_64 ABI */ > + cld > + > + /* > + * Load the callback pointer to %rax and lfence for LVI (load value > + * injection) protection before making the call. > + */ > + mov SGX_ENCLAVE_RUN_USER_HANDLER(%rax), %rax > + lfence > + call *%rax > + > + /* Undo the post-exit %rsp adjustment. */ > + lea 0x10(%rsp, %rbx), %rsp > + > + /* > + * If the return from callback is zero or negative, return immediately, > + * else re-execute ENCLU with the postive return value interpreted as > + * the requested ENCLU function. > + */ > + cmp $0, %eax > + jle .Lout > + jmp .Lenter_enclave > + > + .cfi_endproc > + > +_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception) > + > +SYM_FUNC_END(__vdso_sgx_enter_enclave) > diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h > new file mode 100644 > index 000000000000..b1314e41a744 > --- /dev/null > +++ b/arch/x86/include/asm/enclu.h > @@ -0,0 +1,9 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _ASM_X86_ENCLU_H > +#define _ASM_X86_ENCLU_H > + > +#define EENTER 0x02 > +#define ERESUME 0x03 > +#define EEXIT 0x04 > + > +#endif /* _ASM_X86_ENCLU_H */ > diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h > index c32210235bf5..791e45334a4a 100644 > --- a/arch/x86/include/uapi/asm/sgx.h > +++ b/arch/x86/include/uapi/asm/sgx.h > @@ -74,4 +74,95 @@ struct sgx_enclave_provision { > __u64 fd; > }; > > +struct sgx_enclave_run; > + > +/** > + * typedef sgx_enclave_user_handler_t - Exit handler function accepted by > + * __vdso_sgx_enter_enclave() > + * @run: The run instance given by the caller > + * > + * The register parameters contain the snapshot of their values at enclave > + * exit. An invalid ENCLU function number will cause -EINVAL to be returned > + * to the caller. > + * > + * Return: > + * - <= 0: The given value is returned back to the caller. > + * - > 0: ENCLU function to invoke, either EENTER or ERESUME. > + */ > +typedef int (*sgx_enclave_user_handler_t)(long rdi, long rsi, long rdx, > + long rsp, long r8, long r9, > + struct sgx_enclave_run *run); > + > +/** > + * struct sgx_enclave_run - the execution context of __vdso_sgx_enter_enclave() > + * @tcs: TCS used to enter the enclave > + * @function: The last seen ENCLU function (EENTER, ERESUME or EEXIT) > + * @exception_vector: The interrupt vector of the exception > + * @exception_error_code: The exception error code pulled out of the stack > + * @exception_addr: The address that triggered the exception > + * @user_handler: User provided callback run on exception > + * @user_data: Data passed to the user handler > + * @reserved Reserved for future extensions > + * > + * If @user_handler is provided, the handler will be invoked on all return paths > + * of the normal flow. The user handler may transfer control, e.g. via a > + * longjmp() call or a C++ exception, without returning to > + * __vdso_sgx_enter_enclave(). > + */ > +struct sgx_enclave_run { > + __u64 tcs; > + __u32 function; > + __u16 exception_vector; > + __u16 exception_error_code; > + __u64 exception_addr; > + __u64 user_handler; > + __u64 user_data; > + __u8 reserved[216]; > +}; > + > +/** > + * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(), > + * a vDSO function to enter an SGX enclave. > + * @rdi: Pass-through value for RDI > + * @rsi: Pass-through value for RSI > + * @rdx: Pass-through value for RDX > + * @function: ENCLU function, must be EENTER or ERESUME > + * @r8: Pass-through value for R8 > + * @r9: Pass-through value for R9 > + * @run: struct sgx_enclave_run, must be non-NULL > + * > + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the > + * x86-64 ABI, e.g. doesn't handle XSAVE state. Except for non-volatile > + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting > + * state in accordance with the x86-64 ABI is the responsibility of the enclave > + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C > + * code without careful consideration by both the enclave and its runtime. > + * > + * All general purpose registers except RAX, RBX and RCX are passed as-is to the > + * enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are loaded > + * with @function, asynchronous exit pointer, and @run.tcs respectively. > + * > + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the > + * pre-enclave state, e.g. to retrieve @run.exception and @run.user_handler > + * after an enclave exit. All other registers are available for use by the > + * enclave and its runtime, e.g. an enclave can push additional data onto the > + * stack (and modify RSP) to pass information to the optional user handler (see > + * below). > + * > + * Most exceptions reported on ENCLU, including those that occur within the > + * enclave, are fixed up and reported synchronously instead of being delivered > + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are > + * never fixed up and are always delivered via standard signals. On synchrously > + * reported exceptions, -EFAULT is returned This part about EFAULT is also bogus. -- Jethro Beekman | Fortanix