On 2020-07-07 05:37, Jarkko Sakkinen wrote: > From: Sean Christopherson > > An SGX runtime must be aware of the exceptions, which happen inside an > enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns > the CPU exception back to the caller exactly when it happens. > > Kernel fixups the exception information to RDI, RSI and RDX. The SGX call > vDSO handler fills this information to the user provided buffer or > alternatively trigger user provided callback at the time of the exception. > > The calling convention is custom and does not follow System V x86-64 ABI. > > Suggested-by: Andy Lutomirski > Acked-by: Jethro Beekman > Tested-by: Jethro Beekman > Signed-off-by: Sean Christopherson > Co-developed-by: Cedric Xing > Signed-off-by: Cedric Xing > Signed-off-by: Jarkko Sakkinen > --- > arch/x86/entry/vdso/Makefile | 2 + > arch/x86/entry/vdso/vdso.lds.S | 1 + > arch/x86/entry/vdso/vsgx_enter_enclave.S | 131 +++++++++++++++++++++++ > arch/x86/include/asm/enclu.h | 8 ++ > arch/x86/include/uapi/asm/sgx.h | 98 +++++++++++++++++ > 5 files changed, 240 insertions(+) > create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S > create mode 100644 arch/x86/include/asm/enclu.h > > diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile > index ebe82b7aecda..f71ad5ebd0c4 100644 > --- a/arch/x86/entry/vdso/Makefile > +++ b/arch/x86/entry/vdso/Makefile > @@ -29,6 +29,7 @@ VDSO32-$(CONFIG_IA32_EMULATION) := y > vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o > vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o > vobjs32-y += vdso32/vclock_gettime.o > +vobjs-$(VDSO64-y) += vsgx_enter_enclave.o > > # files to link into kernel > obj-y += vma.o extable.o > @@ -100,6 +101,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS > CFLAGS_REMOVE_vclock_gettime.o = -pg > CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg > CFLAGS_REMOVE_vgetcpu.o = -pg > +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg > > # > # X32 processes use x32 vDSO to access 64bit kernel data. > diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S > index 36b644e16272..4bf48462fca7 100644 > --- a/arch/x86/entry/vdso/vdso.lds.S > +++ b/arch/x86/entry/vdso/vdso.lds.S > @@ -27,6 +27,7 @@ VERSION { > __vdso_time; > clock_getres; > __vdso_clock_getres; > + __vdso_sgx_enter_enclave; > local: *; > }; > } > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > new file mode 100644 > index 000000000000..be7e467e1efb > --- /dev/null > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > @@ -0,0 +1,131 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#include > +#include > +#include > +#include > + > +#include "extable.h" > + > +#define EX_LEAF 0*8 > +#define EX_TRAPNR 0*8+4 > +#define EX_ERROR_CODE 0*8+6 > +#define EX_ADDRESS 1*8 > + > +.code64 > +.section .text, "ax" > + > +SYM_FUNC_START(__vdso_sgx_enter_enclave) > + /* Prolog */ > + .cfi_startproc > + push %rbp > + .cfi_adjust_cfa_offset 8 > + .cfi_rel_offset %rbp, 0 > + mov %rsp, %rbp > + .cfi_def_cfa_register %rbp > + push %rbx > + .cfi_rel_offset %rbx, -8 > + > + mov %ecx, %eax > +.Lenter_enclave: > + /* EENTER <= leaf <= ERESUME */ > + cmp $EENTER, %eax > + jb .Linvalid_leaf > + cmp $ERESUME, %eax > + ja .Linvalid_leaf > + > + /* Load TCS and AEP */ > + mov 0x10(%rbp), %rbx > + lea .Lasync_exit_pointer(%rip), %rcx > + > + /* Single ENCLU serving as both EENTER and AEP (ERESUME) */ > +.Lasync_exit_pointer: > +.Lenclu_eenter_eresume: > + enclu After thinking about this some more, I'd like to come back to this setup. Prior discussion at https://lkml.org/lkml/2018/11/2/597 . I hope I'm not derailing the discussion so much as to delay the patch set :( I previously mentioned “Userspace may want fine-grained control over enclave scheduling” as a reason userspace may want to specify a different AEP, but gave a bad example. Here's a better example: If I'm running my enclave in an M:N threading model (where M user threads run N TCSs, with N > M), an AEX is a good oppurtunity to switch contexts. Yes, I could implement this with alarm() or so, but that adds overhead while missing out on a lot of opportunities for context switching. -- Jethro Beekman | Fortanix