All of lore.kernel.org
 help / color / mirror / Atom feed
From: madvenka@linux.microsoft.com
To: kernel-hardening@lists.openwall.com, linux-api@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-fsdevel@vger.kernel.org, linux-integrity@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	linux-security-module@vger.kernel.org, oleg@redhat.com,
	x86@kernel.org, madvenka@linux.microsoft.com
Subject: [PATCH v1 2/4] [RFC] x86/trampfd: Provide support for the trampoline file descriptor
Date: Tue, 28 Jul 2020 08:10:48 -0500	[thread overview]
Message-ID: <20200728131050.24443-3-madvenka@linux.microsoft.com> (raw)
In-Reply-To: <20200728131050.24443-1-madvenka@linux.microsoft.com>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

Implement 32-bit and 64-bit X86 support for the trampoline file descriptor.

	- Define architecture specific register names
	- Handle the trampoline invocation page fault
	- Setup the user register context on trampoline invocation
	- Setup the user stack context on trampoline invocation

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl |   1 +
 arch/x86/include/uapi/asm/ptrace.h     |  38 +++
 arch/x86/kernel/Makefile               |   2 +
 arch/x86/kernel/trampfd.c              | 313 +++++++++++++++++++++++++
 arch/x86/mm/fault.c                    |  11 +
 6 files changed, 366 insertions(+)
 create mode 100644 arch/x86/kernel/trampfd.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d8f8a1a69ed1..77eb50414591 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -443,3 +443,4 @@
 437	i386	openat2			sys_openat2
 438	i386	pidfd_getfd		sys_pidfd_getfd
 439	i386	faccessat2		sys_faccessat2
+440	i386	trampfd_create		sys_trampfd_create
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..9d962de1d21f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -360,6 +360,7 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
+440	common	trampfd_create		sys_trampfd_create
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index 85165c0edafc..b031598f857e 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -9,6 +9,44 @@
 
 #ifndef __ASSEMBLY__
 
+/*
+ * These register names are to be used by 32-bit applications.
+ */
+enum reg_32_name {
+	x32_eax,
+	x32_ebx,
+	x32_ecx,
+	x32_edx,
+	x32_esi,
+	x32_edi,
+	x32_ebp,
+	x32_eip,
+	x32_max,
+};
+
+/*
+ * These register names are to be used by 64-bit applications.
+ */
+enum reg_64_name {
+	x64_rax = x32_max,
+	x64_rbx,
+	x64_rcx,
+	x64_rdx,
+	x64_rsi,
+	x64_rdi,
+	x64_rbp,
+	x64_r8,
+	x64_r9,
+	x64_r10,
+	x64_r11,
+	x64_r12,
+	x64_r13,
+	x64_r14,
+	x64_r15,
+	x64_rip,
+	x64_max,
+};
+
 #ifdef __i386__
 /* this struct defines the way the registers are stored on the
    stack during a system call. */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e77261db2391..5d968ac4c7d9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -157,3 +157,5 @@ ifeq ($(CONFIG_X86_64),y)
 endif
 
 obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
+
+obj-$(CONFIG_TRAMPFD)			+= trampfd.o
diff --git a/arch/x86/kernel/trampfd.c b/arch/x86/kernel/trampfd.c
new file mode 100644
index 000000000000..f6b5507134d2
--- /dev/null
+++ b/arch/x86/kernel/trampfd.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - X86 support.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@linux.microsoft.com)
+ *
+ * Copyright (c) 2020, Microsoft Corporation.
+ */
+
+#include <linux/thread_info.h>
+#include <linux/mm_types.h>
+#include <linux/trampfd.h>
+#include <linux/uaccess.h>
+
+/* ---------------------------- Register Context ---------------------------- */
+
+static inline bool is_compat(void)
+{
+	return (IS_ENABLED(CONFIG_X86_32) ||
+		(IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_ADDR32)));
+}
+
+static void set_reg_32(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+	switch (name) {
+	case x32_eax:
+		pt_regs->ax = (unsigned long)value;
+		break;
+	case x32_ebx:
+		pt_regs->bx = (unsigned long)value;
+		break;
+	case x32_ecx:
+		pt_regs->cx = (unsigned long)value;
+		break;
+	case x32_edx:
+		pt_regs->dx = (unsigned long)value;
+		break;
+	case x32_esi:
+		pt_regs->si = (unsigned long)value;
+		break;
+	case x32_edi:
+		pt_regs->di = (unsigned long)value;
+		break;
+	case x32_ebp:
+		pt_regs->bp = (unsigned long)value;
+		break;
+	case x32_eip:
+		pt_regs->ip = (unsigned long)value;
+		break;
+	default:
+		WARN(1, "%s: Illegal register name %d\n", __func__, name);
+		break;
+	}
+}
+
+#ifdef __i386__
+
+static void set_reg_64(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+}
+
+#else
+
+static void set_reg_64(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+	switch (name) {
+	case x64_rax:
+		pt_regs->ax = (unsigned long)value;
+		break;
+	case x64_rbx:
+		pt_regs->bx = (unsigned long)value;
+		break;
+	case x64_rcx:
+		pt_regs->cx = (unsigned long)value;
+		break;
+	case x64_rdx:
+		pt_regs->dx = (unsigned long)value;
+		break;
+	case x64_rsi:
+		pt_regs->si = (unsigned long)value;
+		break;
+	case x64_rdi:
+		pt_regs->di = (unsigned long)value;
+		break;
+	case x64_rbp:
+		pt_regs->bp = (unsigned long)value;
+		break;
+	case x64_r8:
+		pt_regs->r8 = (unsigned long)value;
+		break;
+	case x64_r9:
+		pt_regs->r9 = (unsigned long)value;
+		break;
+	case x64_r10:
+		pt_regs->r10 = (unsigned long)value;
+		break;
+	case x64_r11:
+		pt_regs->r11 = (unsigned long)value;
+		break;
+	case x64_r12:
+		pt_regs->r12 = (unsigned long)value;
+		break;
+	case x64_r13:
+		pt_regs->r13 = (unsigned long)value;
+		break;
+	case x64_r14:
+		pt_regs->r14 = (unsigned long)value;
+		break;
+	case x64_r15:
+		pt_regs->r15 = (unsigned long)value;
+		break;
+	case x64_rip:
+		pt_regs->ip = (unsigned long)value;
+		break;
+	default:
+		WARN(1, "%s: Illegal register name %d\n", __func__, name);
+		break;
+	}
+}
+
+#endif /* __i386__ */
+
+static void set_regs(struct pt_regs *pt_regs, struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	bool			compat = is_compat();
+
+	for (; reg < reg_end; reg++) {
+		if (compat)
+			set_reg_32(pt_regs, reg->name, reg->value);
+		else
+			set_reg_64(pt_regs, reg->name, reg->value);
+	}
+}
+
+/*
+ * Check if the register names are valid. Check if the user PC has been set.
+ */
+bool trampfd_valid_regs(struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	int			min, max, pc_name;
+	bool			pc_set = false;
+
+	if (is_compat()) {
+		min = 0;
+		pc_name = x32_eip;
+		max = x32_max;
+	} else {
+		min = x32_max;
+		pc_name = x64_rip;
+		max = x64_max;
+	}
+
+	for (; reg < reg_end; reg++) {
+		if (reg->name < min || reg->name >= max || reg->reserved)
+			return false;
+		if (reg->name == pc_name && reg->value)
+			pc_set = true;
+	}
+	return pc_set;
+}
+EXPORT_SYMBOL_GPL(trampfd_valid_regs);
+
+/*
+ * Check if the PC specified in a register context is allowed.
+ */
+bool trampfd_allowed_pc(struct trampfd *trampfd, struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	struct trampfd_values	*allowed_pcs = trampfd->allowed_pcs;
+	u64			*allowed_values, pc_value = 0;
+	u32			nvalues, pc_name;
+	int			i;
+
+	if (!allowed_pcs)
+		return true;
+
+	pc_name = is_compat() ? x32_eip : x64_rip;
+
+	/*
+	 * Find the PC register and its value. If the PC register has been
+	 * specified multiple times, only the last one counts.
+	 */
+	for (; reg < reg_end; reg++) {
+		if (reg->name == pc_name)
+			pc_value = reg->value;
+	}
+
+	allowed_values = allowed_pcs->values;
+	nvalues = allowed_pcs->nvalues;
+
+	for (i = 0; i < nvalues; i++) {
+		if (pc_value == allowed_values[i])
+			return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(trampfd_allowed_pc);
+
+/* ---------------------------- Stack Context ---------------------------- */
+
+static int push_data(struct pt_regs *pt_regs, struct trampfd_stack *tstack)
+{
+	unsigned long	sp;
+
+	sp = user_stack_pointer(pt_regs) - tstack->size - tstack->offset;
+	if (tstack->flags & TRAMPFD_SET_SP) {
+		if (is_compat())
+			sp = ((sp + 4) & -16ul) - 4;
+		else
+			sp = round_down(sp, 16) - 8;
+	}
+
+	if (!access_ok(sp, user_stack_pointer(pt_regs) - sp))
+		return -EFAULT;
+
+	if (copy_to_user(USERPTR(sp), tstack->data, tstack->size))
+		return -EFAULT;
+
+	if (tstack->flags & TRAMPFD_SET_SP)
+		user_stack_pointer_set(pt_regs, sp);
+
+	return 0;
+}
+
+/* ---------------------------- Fault Handlers ---------------------------- */
+
+static int trampfd_user_fault(struct trampfd *trampfd,
+			      struct vm_area_struct *vma,
+			      struct pt_regs *pt_regs)
+{
+	char			buf[TRAMPFD_MAX_STACK_SIZE];
+	struct trampfd_regs	*tregs;
+	struct trampfd_stack	*tstack = NULL;
+	unsigned long		addr;
+	size_t			size;
+	int			rc = 0;
+
+	mutex_lock(&trampfd->lock);
+
+	/*
+	 * Execution of the trampoline must start at the offset specfied by
+	 * the kernel.
+	 */
+	addr = vma->vm_start + trampfd->map.ioffset;
+	if (addr != pt_regs->ip) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	/*
+	 * At a minimum, the user PC register must be specified for a
+	 * user trampoline.
+	 */
+	tregs = trampfd->regs;
+	if (!tregs) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	/*
+	 * Set the register context for the trampoline.
+	 */
+	set_regs(pt_regs, tregs);
+
+	if (trampfd->stack) {
+		/*
+		 * Copy the stack context into a local buffer and push stack
+		 * data after dropping the lock.
+		 */
+		size = sizeof(*trampfd->stack) + trampfd->stack->size;
+		tstack = (struct trampfd_stack *) buf;
+		memcpy(tstack, trampfd->stack, size);
+	}
+unlock:
+	mutex_unlock(&trampfd->lock);
+
+	if (!rc && tstack) {
+		mmap_read_unlock(vma->vm_mm);
+		rc = push_data(pt_regs, tstack);
+		mmap_read_lock(vma->vm_mm);
+	}
+	return rc;
+}
+
+/*
+ * Handle it if it is a trampoline fault.
+ */
+bool trampfd_fault(struct vm_area_struct *vma, struct pt_regs *pt_regs)
+{
+	struct trampfd		*trampfd;
+
+	if (!is_trampfd_vma(vma))
+		return false;
+	trampfd = vma->vm_private_data;
+
+	if (trampfd->type == TRAMPFD_USER)
+		return !trampfd_user_fault(trampfd, vma, pt_regs);
+	return false;
+}
+EXPORT_SYMBOL_GPL(trampfd_fault);
+
+/* ------------------------- Arch Initialization ------------------------- */
+
+int trampfd_check_arch(struct trampfd *trampfd)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trampfd_check_arch);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1ead568c0101..a1432ee2a1a2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 #include <linux/efi.h>			/* efi_recover_from_page_fault()*/
 #include <linux/mm_types.h>
+#include <linux/trampfd.h>		/* trampoline invocation */
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
@@ -1142,6 +1143,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 	struct mm_struct *mm;
 	vm_fault_t fault, major = 0;
 	unsigned int flags = FAULT_FLAG_DEFAULT;
+	unsigned long tflags = X86_PF_INSTR | X86_PF_USER;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -1275,6 +1277,15 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 */
 good_area:
 	if (unlikely(access_error(hw_error_code, vma))) {
+		/*
+		 * If it is a user execute fault, it could be a trampoline
+		 * invocation.
+		 */
+		if ((hw_error_code & tflags) == tflags &&
+		    trampfd_fault(vma, regs)) {
+			mmap_read_unlock(mm);
+			return;
+		}
 		bad_area_access_error(regs, hw_error_code, address, vma);
 		return;
 	}
-- 
2.17.1


WARNING: multiple messages have this Message-ID (diff)
From: madvenka@linux.microsoft.com
To: kernel-hardening@lists.openwall.com, linux-api@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-fsdevel@vger.kernel.org, linux-integrity@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	linux-security-module@vger.kernel.org, oleg@redhat.com,
	x86@kernel.org, madvenka@linux.microsoft.com
Subject: [PATCH v1 2/4] [RFC] x86/trampfd: Provide support for the trampoline file descriptor
Date: Tue, 28 Jul 2020 08:10:48 -0500	[thread overview]
Message-ID: <20200728131050.24443-3-madvenka@linux.microsoft.com> (raw)
In-Reply-To: <20200728131050.24443-1-madvenka@linux.microsoft.com>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

Implement 32-bit and 64-bit X86 support for the trampoline file descriptor.

	- Define architecture specific register names
	- Handle the trampoline invocation page fault
	- Setup the user register context on trampoline invocation
	- Setup the user stack context on trampoline invocation

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl |   1 +
 arch/x86/include/uapi/asm/ptrace.h     |  38 +++
 arch/x86/kernel/Makefile               |   2 +
 arch/x86/kernel/trampfd.c              | 313 +++++++++++++++++++++++++
 arch/x86/mm/fault.c                    |  11 +
 6 files changed, 366 insertions(+)
 create mode 100644 arch/x86/kernel/trampfd.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d8f8a1a69ed1..77eb50414591 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -443,3 +443,4 @@
 437	i386	openat2			sys_openat2
 438	i386	pidfd_getfd		sys_pidfd_getfd
 439	i386	faccessat2		sys_faccessat2
+440	i386	trampfd_create		sys_trampfd_create
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..9d962de1d21f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -360,6 +360,7 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
+440	common	trampfd_create		sys_trampfd_create
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index 85165c0edafc..b031598f857e 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -9,6 +9,44 @@
 
 #ifndef __ASSEMBLY__
 
+/*
+ * These register names are to be used by 32-bit applications.
+ */
+enum reg_32_name {
+	x32_eax,
+	x32_ebx,
+	x32_ecx,
+	x32_edx,
+	x32_esi,
+	x32_edi,
+	x32_ebp,
+	x32_eip,
+	x32_max,
+};
+
+/*
+ * These register names are to be used by 64-bit applications.
+ */
+enum reg_64_name {
+	x64_rax = x32_max,
+	x64_rbx,
+	x64_rcx,
+	x64_rdx,
+	x64_rsi,
+	x64_rdi,
+	x64_rbp,
+	x64_r8,
+	x64_r9,
+	x64_r10,
+	x64_r11,
+	x64_r12,
+	x64_r13,
+	x64_r14,
+	x64_r15,
+	x64_rip,
+	x64_max,
+};
+
 #ifdef __i386__
 /* this struct defines the way the registers are stored on the
    stack during a system call. */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e77261db2391..5d968ac4c7d9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -157,3 +157,5 @@ ifeq ($(CONFIG_X86_64),y)
 endif
 
 obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
+
+obj-$(CONFIG_TRAMPFD)			+= trampfd.o
diff --git a/arch/x86/kernel/trampfd.c b/arch/x86/kernel/trampfd.c
new file mode 100644
index 000000000000..f6b5507134d2
--- /dev/null
+++ b/arch/x86/kernel/trampfd.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - X86 support.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@linux.microsoft.com)
+ *
+ * Copyright (c) 2020, Microsoft Corporation.
+ */
+
+#include <linux/thread_info.h>
+#include <linux/mm_types.h>
+#include <linux/trampfd.h>
+#include <linux/uaccess.h>
+
+/* ---------------------------- Register Context ---------------------------- */
+
+static inline bool is_compat(void)
+{
+	return (IS_ENABLED(CONFIG_X86_32) ||
+		(IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_ADDR32)));
+}
+
+static void set_reg_32(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+	switch (name) {
+	case x32_eax:
+		pt_regs->ax = (unsigned long)value;
+		break;
+	case x32_ebx:
+		pt_regs->bx = (unsigned long)value;
+		break;
+	case x32_ecx:
+		pt_regs->cx = (unsigned long)value;
+		break;
+	case x32_edx:
+		pt_regs->dx = (unsigned long)value;
+		break;
+	case x32_esi:
+		pt_regs->si = (unsigned long)value;
+		break;
+	case x32_edi:
+		pt_regs->di = (unsigned long)value;
+		break;
+	case x32_ebp:
+		pt_regs->bp = (unsigned long)value;
+		break;
+	case x32_eip:
+		pt_regs->ip = (unsigned long)value;
+		break;
+	default:
+		WARN(1, "%s: Illegal register name %d\n", __func__, name);
+		break;
+	}
+}
+
+#ifdef __i386__
+
+static void set_reg_64(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+}
+
+#else
+
+static void set_reg_64(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+	switch (name) {
+	case x64_rax:
+		pt_regs->ax = (unsigned long)value;
+		break;
+	case x64_rbx:
+		pt_regs->bx = (unsigned long)value;
+		break;
+	case x64_rcx:
+		pt_regs->cx = (unsigned long)value;
+		break;
+	case x64_rdx:
+		pt_regs->dx = (unsigned long)value;
+		break;
+	case x64_rsi:
+		pt_regs->si = (unsigned long)value;
+		break;
+	case x64_rdi:
+		pt_regs->di = (unsigned long)value;
+		break;
+	case x64_rbp:
+		pt_regs->bp = (unsigned long)value;
+		break;
+	case x64_r8:
+		pt_regs->r8 = (unsigned long)value;
+		break;
+	case x64_r9:
+		pt_regs->r9 = (unsigned long)value;
+		break;
+	case x64_r10:
+		pt_regs->r10 = (unsigned long)value;
+		break;
+	case x64_r11:
+		pt_regs->r11 = (unsigned long)value;
+		break;
+	case x64_r12:
+		pt_regs->r12 = (unsigned long)value;
+		break;
+	case x64_r13:
+		pt_regs->r13 = (unsigned long)value;
+		break;
+	case x64_r14:
+		pt_regs->r14 = (unsigned long)value;
+		break;
+	case x64_r15:
+		pt_regs->r15 = (unsigned long)value;
+		break;
+	case x64_rip:
+		pt_regs->ip = (unsigned long)value;
+		break;
+	default:
+		WARN(1, "%s: Illegal register name %d\n", __func__, name);
+		break;
+	}
+}
+
+#endif /* __i386__ */
+
+static void set_regs(struct pt_regs *pt_regs, struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	bool			compat = is_compat();
+
+	for (; reg < reg_end; reg++) {
+		if (compat)
+			set_reg_32(pt_regs, reg->name, reg->value);
+		else
+			set_reg_64(pt_regs, reg->name, reg->value);
+	}
+}
+
+/*
+ * Check if the register names are valid. Check if the user PC has been set.
+ */
+bool trampfd_valid_regs(struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	int			min, max, pc_name;
+	bool			pc_set = false;
+
+	if (is_compat()) {
+		min = 0;
+		pc_name = x32_eip;
+		max = x32_max;
+	} else {
+		min = x32_max;
+		pc_name = x64_rip;
+		max = x64_max;
+	}
+
+	for (; reg < reg_end; reg++) {
+		if (reg->name < min || reg->name >= max || reg->reserved)
+			return false;
+		if (reg->name == pc_name && reg->value)
+			pc_set = true;
+	}
+	return pc_set;
+}
+EXPORT_SYMBOL_GPL(trampfd_valid_regs);
+
+/*
+ * Check if the PC specified in a register context is allowed.
+ */
+bool trampfd_allowed_pc(struct trampfd *trampfd, struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	struct trampfd_values	*allowed_pcs = trampfd->allowed_pcs;
+	u64			*allowed_values, pc_value = 0;
+	u32			nvalues, pc_name;
+	int			i;
+
+	if (!allowed_pcs)
+		return true;
+
+	pc_name = is_compat() ? x32_eip : x64_rip;
+
+	/*
+	 * Find the PC register and its value. If the PC register has been
+	 * specified multiple times, only the last one counts.
+	 */
+	for (; reg < reg_end; reg++) {
+		if (reg->name == pc_name)
+			pc_value = reg->value;
+	}
+
+	allowed_values = allowed_pcs->values;
+	nvalues = allowed_pcs->nvalues;
+
+	for (i = 0; i < nvalues; i++) {
+		if (pc_value == allowed_values[i])
+			return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(trampfd_allowed_pc);
+
+/* ---------------------------- Stack Context ---------------------------- */
+
+static int push_data(struct pt_regs *pt_regs, struct trampfd_stack *tstack)
+{
+	unsigned long	sp;
+
+	sp = user_stack_pointer(pt_regs) - tstack->size - tstack->offset;
+	if (tstack->flags & TRAMPFD_SET_SP) {
+		if (is_compat())
+			sp = ((sp + 4) & -16ul) - 4;
+		else
+			sp = round_down(sp, 16) - 8;
+	}
+
+	if (!access_ok(sp, user_stack_pointer(pt_regs) - sp))
+		return -EFAULT;
+
+	if (copy_to_user(USERPTR(sp), tstack->data, tstack->size))
+		return -EFAULT;
+
+	if (tstack->flags & TRAMPFD_SET_SP)
+		user_stack_pointer_set(pt_regs, sp);
+
+	return 0;
+}
+
+/* ---------------------------- Fault Handlers ---------------------------- */
+
+static int trampfd_user_fault(struct trampfd *trampfd,
+			      struct vm_area_struct *vma,
+			      struct pt_regs *pt_regs)
+{
+	char			buf[TRAMPFD_MAX_STACK_SIZE];
+	struct trampfd_regs	*tregs;
+	struct trampfd_stack	*tstack = NULL;
+	unsigned long		addr;
+	size_t			size;
+	int			rc = 0;
+
+	mutex_lock(&trampfd->lock);
+
+	/*
+	 * Execution of the trampoline must start at the offset specfied by
+	 * the kernel.
+	 */
+	addr = vma->vm_start + trampfd->map.ioffset;
+	if (addr != pt_regs->ip) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	/*
+	 * At a minimum, the user PC register must be specified for a
+	 * user trampoline.
+	 */
+	tregs = trampfd->regs;
+	if (!tregs) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	/*
+	 * Set the register context for the trampoline.
+	 */
+	set_regs(pt_regs, tregs);
+
+	if (trampfd->stack) {
+		/*
+		 * Copy the stack context into a local buffer and push stack
+		 * data after dropping the lock.
+		 */
+		size = sizeof(*trampfd->stack) + trampfd->stack->size;
+		tstack = (struct trampfd_stack *) buf;
+		memcpy(tstack, trampfd->stack, size);
+	}
+unlock:
+	mutex_unlock(&trampfd->lock);
+
+	if (!rc && tstack) {
+		mmap_read_unlock(vma->vm_mm);
+		rc = push_data(pt_regs, tstack);
+		mmap_read_lock(vma->vm_mm);
+	}
+	return rc;
+}
+
+/*
+ * Handle it if it is a trampoline fault.
+ */
+bool trampfd_fault(struct vm_area_struct *vma, struct pt_regs *pt_regs)
+{
+	struct trampfd		*trampfd;
+
+	if (!is_trampfd_vma(vma))
+		return false;
+	trampfd = vma->vm_private_data;
+
+	if (trampfd->type == TRAMPFD_USER)
+		return !trampfd_user_fault(trampfd, vma, pt_regs);
+	return false;
+}
+EXPORT_SYMBOL_GPL(trampfd_fault);
+
+/* ------------------------- Arch Initialization ------------------------- */
+
+int trampfd_check_arch(struct trampfd *trampfd)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trampfd_check_arch);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1ead568c0101..a1432ee2a1a2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 #include <linux/efi.h>			/* efi_recover_from_page_fault()*/
 #include <linux/mm_types.h>
+#include <linux/trampfd.h>		/* trampoline invocation */
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
@@ -1142,6 +1143,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 	struct mm_struct *mm;
 	vm_fault_t fault, major = 0;
 	unsigned int flags = FAULT_FLAG_DEFAULT;
+	unsigned long tflags = X86_PF_INSTR | X86_PF_USER;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -1275,6 +1277,15 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 */
 good_area:
 	if (unlikely(access_error(hw_error_code, vma))) {
+		/*
+		 * If it is a user execute fault, it could be a trampoline
+		 * invocation.
+		 */
+		if ((hw_error_code & tflags) == tflags &&
+		    trampfd_fault(vma, regs)) {
+			mmap_read_unlock(mm);
+			return;
+		}
 		bad_area_access_error(regs, hw_error_code, address, vma);
 		return;
 	}
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2020-07-28 13:11 UTC|newest]

Thread overview: 146+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <aefc85852ea518982e74b233e11e16d2e707bc32>
2020-07-28 13:10 ` [PATCH v1 0/4] [RFC] Implement Trampoline File Descriptor madvenka
2020-07-28 13:10   ` madvenka
2020-07-28 13:10   ` [PATCH v1 1/4] [RFC] fs/trampfd: Implement the trampoline file descriptor API madvenka
2020-07-28 13:10     ` madvenka
2020-07-28 14:50     ` Oleg Nesterov
2020-07-28 14:50       ` Oleg Nesterov
2020-07-28 14:58       ` Madhavan T. Venkataraman
2020-07-28 14:58         ` Madhavan T. Venkataraman
2020-07-28 16:06         ` Oleg Nesterov
2020-07-28 16:06           ` Oleg Nesterov
2020-07-28 19:48     ` kernel test robot
2020-07-29  2:33     ` kernel test robot
2020-07-28 13:10   ` madvenka [this message]
2020-07-28 13:10     ` [PATCH v1 2/4] [RFC] x86/trampfd: Provide support for the trampoline file descriptor madvenka
2020-07-28 18:38     ` kernel test robot
2020-07-30  9:06     ` Greg KH
2020-07-30  9:06       ` Greg KH
2020-07-30 14:25       ` Madhavan T. Venkataraman
2020-07-30 14:25         ` Madhavan T. Venkataraman
2020-07-28 13:10   ` [PATCH v1 3/4] [RFC] arm64/trampfd: " madvenka
2020-07-28 13:10     ` madvenka
2020-07-28 13:10   ` [PATCH v1 4/4] [RFC] arm/trampfd: " madvenka
2020-07-28 13:10     ` madvenka
2020-07-28 15:13   ` [PATCH v1 0/4] [RFC] Implement Trampoline File Descriptor David Laight
2020-07-28 15:13     ` David Laight
2020-07-28 15:13     ` David Laight
2020-07-28 16:32     ` Madhavan T. Venkataraman
2020-07-28 16:32       ` Madhavan T. Venkataraman
2020-07-28 16:32       ` Madhavan T. Venkataraman
2020-07-28 17:16       ` Andy Lutomirski
2020-07-28 17:16         ` Andy Lutomirski
2020-07-28 17:16         ` Andy Lutomirski
2020-07-28 17:39         ` Madhavan T. Venkataraman
2020-07-29  5:16           ` Andy Lutomirski
2020-07-29  5:16             ` Andy Lutomirski
2020-07-29  5:16             ` Andy Lutomirski
2020-07-28 18:52         ` Madhavan T. Venkataraman
2020-07-28 18:52           ` Madhavan T. Venkataraman
2020-07-28 18:52           ` Madhavan T. Venkataraman
2020-07-29  8:36           ` David Laight
2020-07-29  8:36             ` David Laight
2020-07-29  8:36             ` David Laight
2020-07-29 17:55             ` Madhavan T. Venkataraman
2020-07-29 17:55               ` Madhavan T. Venkataraman
2020-07-29 17:55               ` Madhavan T. Venkataraman
2020-07-28 16:05   ` Casey Schaufler
2020-07-28 16:05     ` Casey Schaufler
2020-07-28 16:49     ` Madhavan T. Venkataraman
2020-07-28 16:49       ` Madhavan T. Venkataraman
2020-07-28 17:05     ` James Morris
2020-07-28 17:05       ` James Morris
2020-07-28 17:08       ` Madhavan T. Venkataraman
2020-07-28 17:08         ` Madhavan T. Venkataraman
2020-07-28 17:31   ` Andy Lutomirski
2020-07-28 17:31     ` Andy Lutomirski
2020-07-28 17:31     ` Andy Lutomirski
2020-07-28 19:01     ` Madhavan T. Venkataraman
2020-07-28 19:01       ` Madhavan T. Venkataraman
2020-07-29 13:29     ` Florian Weimer
2020-07-29 13:29       ` Florian Weimer
2020-07-29 13:29       ` Florian Weimer
2020-07-30 13:09     ` David Laight
2020-07-30 13:09       ` David Laight
2020-08-02 11:56       ` Pavel Machek
2020-08-02 11:56         ` Pavel Machek
2020-08-03  8:08         ` David Laight
2020-08-03  8:08           ` David Laight
2020-08-03 15:57           ` Madhavan T. Venkataraman
2020-08-03 15:57             ` Madhavan T. Venkataraman
2020-07-30 14:24     ` Madhavan T. Venkataraman
2020-07-30 20:54       ` Andy Lutomirski
2020-07-30 20:54         ` Andy Lutomirski
2020-07-30 20:54         ` Andy Lutomirski
2020-07-31 17:13         ` Madhavan T. Venkataraman
2020-07-31 17:13           ` Madhavan T. Venkataraman
2020-07-31 18:31           ` Mark Rutland
2020-07-31 18:31             ` Mark Rutland
2020-08-03  8:27             ` David Laight
2020-08-03  8:27               ` David Laight
2020-08-03 16:03               ` Madhavan T. Venkataraman
2020-08-03 16:03                 ` Madhavan T. Venkataraman
2020-08-03 16:57                 ` David Laight
2020-08-03 16:57                   ` David Laight
2020-08-03 17:00                   ` Madhavan T. Venkataraman
2020-08-03 17:00                     ` Madhavan T. Venkataraman
2020-08-03 17:58             ` Madhavan T. Venkataraman
2020-08-03 17:58               ` Madhavan T. Venkataraman
2020-08-04 13:55               ` Mark Rutland
2020-08-04 13:55                 ` Mark Rutland
2020-08-04 14:33                 ` David Laight
2020-08-04 14:33                   ` David Laight
2020-08-04 14:44                   ` David Laight
2020-08-04 14:44                     ` David Laight
2020-08-04 14:48                   ` Madhavan T. Venkataraman
2020-08-04 14:48                     ` Madhavan T. Venkataraman
2020-08-04 15:46                 ` Madhavan T. Venkataraman
2020-08-04 15:46                   ` Madhavan T. Venkataraman
2020-08-02 13:57           ` Florian Weimer
2020-08-02 13:57             ` Florian Weimer
2020-08-02 13:57             ` Florian Weimer
2020-07-30 14:42     ` Madhavan T. Venkataraman
2020-07-30 14:42       ` Madhavan T. Venkataraman
2020-08-02 18:54     ` Madhavan T. Venkataraman
2020-08-02 18:54       ` Madhavan T. Venkataraman
2020-08-02 20:00       ` Andy Lutomirski
2020-08-02 20:00         ` Andy Lutomirski
2020-08-02 20:00         ` Andy Lutomirski
2020-08-02 22:58         ` Madhavan T. Venkataraman
2020-08-02 22:58           ` Madhavan T. Venkataraman
2020-08-03 18:36         ` Madhavan T. Venkataraman
2020-08-03 18:36           ` Madhavan T. Venkataraman
2020-08-10 17:20         ` Madhavan T. Venkataraman
2020-08-10 17:34         ` Madhavan T. Venkataraman
2020-08-10 17:34           ` Madhavan T. Venkataraman
2020-08-11 21:12           ` Madhavan T. Venkataraman
2020-08-11 21:12             ` Madhavan T. Venkataraman
2020-08-03  8:23       ` David Laight
2020-08-03  8:23         ` David Laight
2020-08-03 15:59         ` Madhavan T. Venkataraman
2020-08-03 15:59           ` Madhavan T. Venkataraman
2020-07-31 18:09   ` Mark Rutland
2020-07-31 18:09     ` Mark Rutland
2020-07-31 20:08     ` Madhavan T. Venkataraman
2020-07-31 20:08       ` Madhavan T. Venkataraman
2020-08-03 16:57     ` Madhavan T. Venkataraman
2020-08-03 16:57       ` Madhavan T. Venkataraman
2020-08-04 14:30       ` Mark Rutland
2020-08-04 14:30         ` Mark Rutland
2020-08-06 17:26         ` Madhavan T. Venkataraman
2020-08-06 17:26           ` Madhavan T. Venkataraman
2020-08-08 22:17           ` Pavel Machek
2020-08-08 22:17             ` Pavel Machek
2020-08-11 12:41             ` Madhavan T. Venkataraman
2020-08-11 12:41               ` Madhavan T. Venkataraman
2020-08-11 13:08               ` Pavel Machek
2020-08-11 13:08                 ` Pavel Machek
2020-08-11 15:54                 ` Madhavan T. Venkataraman
2020-08-11 15:54                   ` Madhavan T. Venkataraman
2020-08-12 10:06           ` Mark Rutland
2020-08-12 10:06             ` Mark Rutland
2020-08-12 18:47             ` Madhavan T. Venkataraman
2020-08-12 18:47               ` Madhavan T. Venkataraman
2020-08-19 18:53             ` Mickaël Salaün
2020-08-19 18:53               ` Mickaël Salaün
2020-09-01 15:42               ` Mark Rutland
2020-09-01 15:42                 ` Mark Rutland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200728131050.24443-3-madvenka@linux.microsoft.com \
    --to=madvenka@linux.microsoft.com \
    --cc=kernel-hardening@lists.openwall.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-integrity@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.