linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* properly support exec and wait with kernel pointers
@ 2020-06-15 13:00 Christoph Hellwig
  2020-06-15 13:00 ` [PATCH 1/6] exec: cleanup the execve wrappers Christoph Hellwig
                   ` (6 more replies)
  0 siblings, 7 replies; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 13:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Arnd Bergmann, Luis Chamberlain, linux-arm-kernel, x86,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-fsdevel, linux-arch, linux-kernel

Hi all,

this series first cleans up the exec code and then adds proper
kernel_execveat and kernel_wait callers instead of relying on the fact
that the early init code and kernel threads implicitly run with
the address limit set to KERNEL_DS.

Note that the cleanup removes the compat execve(at) handlers (almost)
entirely, as we can handle the compat difference very nicely in a
unified codebase.  The only exception is x86 where this would list the
handlers twice in the same syscall table due to the messed up x32
design.  I had to add an extra compat handler just for that case, but
maybe someone has a better idea.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/6] exec: cleanup the execve wrappers
  2020-06-15 13:00 properly support exec and wait with kernel pointers Christoph Hellwig
@ 2020-06-15 13:00 ` Christoph Hellwig
  2020-06-15 13:00 ` [PATCH 2/6] exec: simplify the compat syscall handling Christoph Hellwig
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 13:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Arnd Bergmann, Luis Chamberlain, linux-arm-kernel, x86,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-fsdevel, linux-arch, linux-kernel

Remove a whole bunch of wrappers that eventually all call
__do_execve_file, and consolidate the execvce helpers to:

  (1) __do_execveat, which is the lowest level helper implementing the
      actual functionality
  (2) do_execvat, which is used by all callers that want native
      pointers
  (3) do_compat_execve, which is used by all compat syscalls

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c               | 98 +++++++++++------------------------------
 include/linux/binfmts.h | 12 ++---
 init/main.c             |  7 +--
 kernel/umh.c            | 16 +++----
 4 files changed, 41 insertions(+), 92 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a7032784..354fdaa536ae7d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1815,10 +1815,7 @@ static int exec_binprm(struct linux_binprm *bprm)
 	return 0;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-static int __do_execve_file(int fd, struct filename *filename,
+static int __do_execveat(int fd, struct filename *filename,
 			    struct user_arg_ptr argv,
 			    struct user_arg_ptr envp,
 			    int flags, struct file *file)
@@ -1972,74 +1969,16 @@ static int __do_execve_file(int fd, struct filename *filename,
 	return retval;
 }
 
-static int do_execveat_common(int fd, struct filename *filename,
-			      struct user_arg_ptr argv,
-			      struct user_arg_ptr envp,
-			      int flags)
-{
-	return __do_execve_file(fd, filename, argv, envp, flags, NULL);
-}
-
-int do_execve_file(struct file *file, void *__argv, void *__envp)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-
-	return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
-}
-
-int do_execve(struct filename *filename,
-	const char __user *const __user *__argv,
-	const char __user *const __user *__envp)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
-}
-
 int do_execveat(int fd, struct filename *filename,
 		const char __user *const __user *__argv,
 		const char __user *const __user *__envp,
-		int flags)
+		int flags, struct file *file)
 {
 	struct user_arg_ptr argv = { .ptr.native = __argv };
 	struct user_arg_ptr envp = { .ptr.native = __envp };
 
-	return do_execveat_common(fd, filename, argv, envp, flags);
-}
-
-#ifdef CONFIG_COMPAT
-static int compat_do_execve(struct filename *filename,
-	const compat_uptr_t __user *__argv,
-	const compat_uptr_t __user *__envp)
-{
-	struct user_arg_ptr argv = {
-		.is_compat = true,
-		.ptr.compat = __argv,
-	};
-	struct user_arg_ptr envp = {
-		.is_compat = true,
-		.ptr.compat = __envp,
-	};
-	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
-}
-
-static int compat_do_execveat(int fd, struct filename *filename,
-			      const compat_uptr_t __user *__argv,
-			      const compat_uptr_t __user *__envp,
-			      int flags)
-{
-	struct user_arg_ptr argv = {
-		.is_compat = true,
-		.ptr.compat = __argv,
-	};
-	struct user_arg_ptr envp = {
-		.is_compat = true,
-		.ptr.compat = __envp,
-	};
-	return do_execveat_common(fd, filename, argv, envp, flags);
+	return __do_execveat(fd, filename, argv, envp, flags, file);
 }
-#endif
 
 void set_binfmt(struct linux_binfmt *new)
 {
@@ -2070,7 +2009,7 @@ SYSCALL_DEFINE3(execve,
 		const char __user *const __user *, argv,
 		const char __user *const __user *, envp)
 {
-	return do_execve(getname(filename), argv, envp);
+	return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
 }
 
 SYSCALL_DEFINE5(execveat,
@@ -2080,18 +2019,34 @@ SYSCALL_DEFINE5(execveat,
 		int, flags)
 {
 	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+	struct filename *name = getname_flags(filename, lookup_flags, NULL);
 
-	return do_execveat(fd,
-			   getname_flags(filename, lookup_flags, NULL),
-			   argv, envp, flags);
+	return do_execveat(fd, name, argv, envp, flags, NULL);
 }
 
 #ifdef CONFIG_COMPAT
+static int do_compat_execve(int fd, struct filename *filename,
+		const compat_uptr_t __user *__argv,
+		const compat_uptr_t __user *__envp,
+		int flags)
+{
+	struct user_arg_ptr argv = {
+		.is_compat = true,
+		.ptr.compat = __argv,
+	};
+	struct user_arg_ptr envp = {
+		.is_compat = true,
+		.ptr.compat = __envp,
+	};
+
+	return __do_execveat(fd, filename, argv, envp, flags, NULL);
+}
+
 COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
 	const compat_uptr_t __user *, argv,
 	const compat_uptr_t __user *, envp)
 {
-	return compat_do_execve(getname(filename), argv, envp);
+	return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 0);
 }
 
 COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
@@ -2101,9 +2056,8 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
 		       int,  flags)
 {
 	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+	struct filename *name = getname_flags(filename, lookup_flags, NULL);
 
-	return compat_do_execveat(fd,
-				  getname_flags(filename, lookup_flags, NULL),
-				  argv, envp, flags);
+	return do_compat_execve(fd, name, argv, envp, flags);
 }
 #endif
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 4a20b7517dd036..bed702e4b1fbd9 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -134,13 +134,9 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
-extern int do_execve(struct filename *,
-		     const char __user * const __user *,
-		     const char __user * const __user *);
-extern int do_execveat(int, struct filename *,
-		       const char __user * const __user *,
-		       const char __user * const __user *,
-		       int);
-int do_execve_file(struct file *file, void *__argv, void *__envp);
+int do_execveat(int fd, struct filename *filename,
+		const char __user *const __user *__argv,
+		const char __user *const __user *__envp,
+		int flags, struct file *file);
 
 #endif /* _LINUX_BINFMTS_H */
diff --git a/init/main.c b/init/main.c
index 0ead83e86b5aa2..838950ea7bca22 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1329,9 +1329,10 @@ static int run_init_process(const char *init_filename)
 	pr_debug("  with environment:\n");
 	for (p = envp_init; *p; p++)
 		pr_debug("    %s\n", *p);
-	return do_execve(getname_kernel(init_filename),
-		(const char __user *const __user *)argv_init,
-		(const char __user *const __user *)envp_init);
+	return do_execveat(AT_FDCWD, getname_kernel(init_filename),
+			(const char __user *const __user *)argv_init,
+			(const char __user *const __user *)envp_init,
+			0, NULL);
 }
 
 static int try_to_run_init_process(const char *init_filename)
diff --git a/kernel/umh.c b/kernel/umh.c
index 79f139a7ca03c6..7aa9a5817582ca 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -103,15 +103,13 @@ static int call_usermodehelper_exec_async(void *data)
 	commit_creds(new);
 
 	sub_info->pid = task_pid_nr(current);
-	if (sub_info->file) {
-		retval = do_execve_file(sub_info->file,
-					sub_info->argv, sub_info->envp);
-		if (!retval)
-			current->flags |= PF_UMH;
-	} else
-		retval = do_execve(getname_kernel(sub_info->path),
-				   (const char __user *const __user *)sub_info->argv,
-				   (const char __user *const __user *)sub_info->envp);
+	retval = do_execveat(AT_FDCWD,
+			sub_info->path ? getname_kernel(sub_info->path) : NULL,
+			(const char __user *const __user *)sub_info->argv,
+			(const char __user *const __user *)sub_info->envp,
+			0, sub_info->file);
+	if (sub_info->file && !retval)
+		current->flags |= PF_UMH;
 out:
 	sub_info->retval = retval;
 	/*
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 13:00 properly support exec and wait with kernel pointers Christoph Hellwig
  2020-06-15 13:00 ` [PATCH 1/6] exec: cleanup the execve wrappers Christoph Hellwig
@ 2020-06-15 13:00 ` Christoph Hellwig
  2020-06-15 13:31   ` Arnd Bergmann
  2020-06-15 13:00 ` [PATCH 3/6] exec: cleanup the count() function Christoph Hellwig
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 13:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Arnd Bergmann, Luis Chamberlain, linux-arm-kernel, x86,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-fsdevel, linux-arch, linux-kernel

The only differenence betweeen the compat exec* syscalls and their
native versions is that compat_ptr sign extension, and the fact that
the pointer arithmetics for the two dimensional arrays needs to use
the compat pointer size.  Instead of the compat wrappers and the
struct user_arg_ptr machinery just use in_compat_syscall() to do the
right thing for the compat case deep inside get_user_arg_ptr().

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/include/asm/unistd32.h             |  4 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl     |  4 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl     |  4 +-
 arch/parisc/kernel/syscalls/syscall.tbl       |  4 +-
 arch/powerpc/kernel/syscalls/syscall.tbl      |  4 +-
 arch/s390/kernel/syscalls/syscall.tbl         |  4 +-
 arch/sparc/kernel/syscalls.S                  |  4 +-
 arch/x86/entry/syscalls/syscall_32.tbl        |  4 +-
 fs/exec.c                                     | 89 ++++++-------------
 include/linux/compat.h                        |  7 --
 include/uapi/asm-generic/unistd.h             |  4 +-
 tools/include/uapi/asm-generic/unistd.h       |  4 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl   |  4 +-
 .../perf/arch/s390/entry/syscalls/syscall.tbl |  4 +-
 14 files changed, 53 insertions(+), 91 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 6d95d0c8bf2f47..141f5d2ff1c34f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -33,7 +33,7 @@ __SYSCALL(__NR_link, sys_link)
 #define __NR_unlink 10
 __SYSCALL(__NR_unlink, sys_unlink)
 #define __NR_execve 11
-__SYSCALL(__NR_execve, compat_sys_execve)
+__SYSCALL(__NR_execve, sys_execve)
 #define __NR_chdir 12
 __SYSCALL(__NR_chdir, sys_chdir)
 			/* 13 was sys_time */
@@ -785,7 +785,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 386
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 387
-__SYSCALL(__NR_execveat, compat_sys_execveat)
+__SYSCALL(__NR_execveat, sys_execveat)
 #define __NR_userfaultfd 388
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 389
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f777141f52568f..e861b5ab7179c9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -64,7 +64,7 @@
 54	n32	getsockopt			compat_sys_getsockopt
 55	n32	clone				__sys_clone
 56	n32	fork				__sys_fork
-57	n32	execve				compat_sys_execve
+57	n32	execve				sys_execve
 58	n32	exit				sys_exit
 59	n32	wait4				compat_sys_wait4
 60	n32	kill				sys_kill
@@ -328,7 +328,7 @@
 317	n32	getrandom			sys_getrandom
 318	n32	memfd_create			sys_memfd_create
 319	n32	bpf				sys_bpf
-320	n32	execveat			compat_sys_execveat
+320	n32	execveat			sys_execveat
 321	n32	userfaultfd			sys_userfaultfd
 322	n32	membarrier			sys_membarrier
 323	n32	mlock2				sys_mlock2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 13280625d312e9..bba80f74e9968e 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -18,7 +18,7 @@
 8	o32	creat				sys_creat
 9	o32	link				sys_link
 10	o32	unlink				sys_unlink
-11	o32	execve				sys_execve			compat_sys_execve
+11	o32	execve				sys_execve
 12	o32	chdir				sys_chdir
 13	o32	time				sys_time32
 14	o32	mknod				sys_mknod
@@ -367,7 +367,7 @@
 353	o32	getrandom			sys_getrandom
 354	o32	memfd_create			sys_memfd_create
 355	o32	bpf				sys_bpf
-356	o32	execveat			sys_execveat			compat_sys_execveat
+356	o32	execveat			sys_execveat
 357	o32	userfaultfd			sys_userfaultfd
 358	o32	membarrier			sys_membarrier
 359	o32	mlock2				sys_mlock2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5a758fa6ec5242..23fa0d0edf3384 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -18,7 +18,7 @@
 8	common	creat			sys_creat
 9	common	link			sys_link
 10	common	unlink			sys_unlink
-11	common	execve			sys_execve			compat_sys_execve
+11	common	execve			sys_execve
 12	common	chdir			sys_chdir
 13	32	time			sys_time32
 13	64	time			sys_time
@@ -385,7 +385,7 @@
 339	common	getrandom		sys_getrandom
 340	common	memfd_create		sys_memfd_create
 341	common	bpf			sys_bpf
-342	common	execveat		sys_execveat			compat_sys_execveat
+342	common	execveat		sys_execveat
 343	common	membarrier		sys_membarrier
 344	common	userfaultfd		sys_userfaultfd
 345	common	mlock2			sys_mlock2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index f833a319082247..c52cdab89dc0ae 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -20,7 +20,7 @@
 8	common	creat				sys_creat
 9	common	link				sys_link
 10	common	unlink				sys_unlink
-11	nospu	execve				sys_execve			compat_sys_execve
+11	nospu	execve				sys_execve
 12	common	chdir				sys_chdir
 13	32	time				sys_time32
 13	64	time				sys_time
@@ -460,7 +460,7 @@
 359	common	getrandom			sys_getrandom
 360	common	memfd_create			sys_memfd_create
 361	common	bpf				sys_bpf
-362	nospu	execveat			sys_execveat			compat_sys_execveat
+362	nospu	execveat			sys_execveat
 363	32	switch_endian			sys_ni_syscall
 363	64	switch_endian			sys_switch_endian
 363	spu	switch_endian			sys_ni_syscall
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bfdcb763395735..bd2275db2026ea 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -18,7 +18,7 @@
 8    common	creat			sys_creat			sys_creat
 9    common	link			sys_link			sys_link
 10   common	unlink			sys_unlink			sys_unlink
-11   common	execve			sys_execve			compat_sys_execve
+11   common	execve			sys_execve			sys_execve
 12   common	chdir			sys_chdir			sys_chdir
 13   32		time			-				sys_time32
 14   common	mknod			sys_mknod			sys_mknod
@@ -361,7 +361,7 @@
 351  common	bpf			sys_bpf				sys_bpf
 352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		sys_s390_pci_mmio_write
 353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		sys_s390_pci_mmio_read
-354  common	execveat		sys_execveat			compat_sys_execveat
+354  common	execveat		sys_execveat			sys_execveat
 355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
 356  common	membarrier		sys_membarrier			sys_membarrier
 357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index db42b4fb370844..70463972152a92 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -16,12 +16,12 @@ sys64_execveat:
 sunos_execv:
 	mov	%g0, %o2
 sys32_execve:
-	set	compat_sys_execve, %g1
+	set	sys_execve, %g1
 	jmpl	%g1, %g0
 	 flushw
 
 sys32_execveat:
-	set	compat_sys_execveat, %g1
+	set	sys_execveat, %g1
 	jmpl	%g1, %g0
 	 flushw
 #endif
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d8f8a1a69ed11f..2b1eccd3f8f697 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -22,7 +22,7 @@
 8	i386	creat			sys_creat
 9	i386	link			sys_link
 10	i386	unlink			sys_unlink
-11	i386	execve			sys_execve			compat_sys_execve
+11	i386	execve			sys_execve
 12	i386	chdir			sys_chdir
 13	i386	time			sys_time32
 14	i386	mknod			sys_mknod
@@ -369,7 +369,7 @@
 355	i386	getrandom		sys_getrandom
 356	i386	memfd_create		sys_memfd_create
 357	i386	bpf			sys_bpf
-358	i386	execveat		sys_execveat			compat_sys_execveat
+358	i386	execveat		sys_execveat
 359	i386	socket			sys_socket
 360	i386	socketpair		sys_socketpair
 361	i386	bind			sys_bind
diff --git a/fs/exec.c b/fs/exec.c
index 354fdaa536ae7d..94107eceda8a67 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -386,34 +386,25 @@ static int bprm_mm_init(struct linux_binprm *bprm)
 	return err;
 }
 
-struct user_arg_ptr {
-#ifdef CONFIG_COMPAT
-	bool is_compat;
-#endif
-	union {
-		const char __user *const __user *native;
-#ifdef CONFIG_COMPAT
-		const compat_uptr_t __user *compat;
-#endif
-	} ptr;
-};
-
-static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
+static const char __user *
+get_user_arg_ptr(const char __user *const __user *argv, int nr)
 {
 	const char __user *native;
 
 #ifdef CONFIG_COMPAT
-	if (unlikely(argv.is_compat)) {
+	if (in_compat_syscall()) {
+		const compat_uptr_t __user *compat_argv =
+			compat_ptr((unsigned long)argv);
 		compat_uptr_t compat;
 
-		if (get_user(compat, argv.ptr.compat + nr))
+		if (get_user(compat, compat_argv + nr))
 			return ERR_PTR(-EFAULT);
 
 		return compat_ptr(compat);
 	}
 #endif
 
-	if (get_user(native, argv.ptr.native + nr))
+	if (get_user(native, argv + nr))
 		return ERR_PTR(-EFAULT);
 
 	return native;
@@ -422,11 +413,11 @@ static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
 /*
  * count() counts the number of strings in array ARGV.
  */
-static int count(struct user_arg_ptr argv, int max)
+static int count(const char __user *const __user *argv, int max)
 {
 	int i = 0;
 
-	if (argv.ptr.native != NULL) {
+	if (argv) {
 		for (;;) {
 			const char __user *p = get_user_arg_ptr(argv, i);
 
@@ -449,7 +440,8 @@ static int count(struct user_arg_ptr argv, int max)
 }
 
 static int prepare_arg_pages(struct linux_binprm *bprm,
-			struct user_arg_ptr argv, struct user_arg_ptr envp)
+		const char __user *const __user *argv,
+		const char __user *const __user *envp)
 {
 	unsigned long limit, ptr_size;
 
@@ -497,7 +489,7 @@ static int prepare_arg_pages(struct linux_binprm *bprm,
  * processes's memory to the new process's stack.  The call to get_user_pages()
  * ensures the destination page is created and not swapped out.
  */
-static int copy_strings(int argc, struct user_arg_ptr argv,
+static int copy_strings(int argc, const char __user *const __user *argv,
 			struct linux_binprm *bprm)
 {
 	struct page *kmapped_page = NULL;
@@ -1815,10 +1807,10 @@ static int exec_binprm(struct linux_binprm *bprm)
 	return 0;
 }
 
-static int __do_execveat(int fd, struct filename *filename,
-			    struct user_arg_ptr argv,
-			    struct user_arg_ptr envp,
-			    int flags, struct file *file)
+int do_execveat(int fd, struct filename *filename,
+		const char __user *const __user *argv,
+		const char __user *const __user *envp,
+		int flags, struct file *file)
 {
 	char *pathbuf = NULL;
 	struct linux_binprm *bprm;
@@ -1969,17 +1961,6 @@ static int __do_execveat(int fd, struct filename *filename,
 	return retval;
 }
 
-int do_execveat(int fd, struct filename *filename,
-		const char __user *const __user *__argv,
-		const char __user *const __user *__envp,
-		int flags, struct file *file)
-{
-	struct user_arg_ptr argv = { .ptr.native = __argv };
-	struct user_arg_ptr envp = { .ptr.native = __envp };
-
-	return __do_execveat(fd, filename, argv, envp, flags, file);
-}
-
 void set_binfmt(struct linux_binfmt *new)
 {
 	struct mm_struct *mm = current->mm;
@@ -2024,40 +2005,28 @@ SYSCALL_DEFINE5(execveat,
 	return do_execveat(fd, name, argv, envp, flags, NULL);
 }
 
-#ifdef CONFIG_COMPAT
-static int do_compat_execve(int fd, struct filename *filename,
-		const compat_uptr_t __user *__argv,
-		const compat_uptr_t __user *__envp,
-		int flags)
-{
-	struct user_arg_ptr argv = {
-		.is_compat = true,
-		.ptr.compat = __argv,
-	};
-	struct user_arg_ptr envp = {
-		.is_compat = true,
-		.ptr.compat = __envp,
-	};
-
-	return __do_execveat(fd, filename, argv, envp, flags, NULL);
-}
-
+/*
+ * x32 syscalls are listed in the same table as x86_64 ones, so we need to
+ * define compat syscalls that are exactly the same as the native version for
+ * the syscall table machinery to work.  Sigh..
+ */
+#ifdef CONFIG_X86_X32
 COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
-	const compat_uptr_t __user *, argv,
-	const compat_uptr_t __user *, envp)
+		       const char __user *const __user *, argv,
+		       const char __user *const __user *, envp)
 {
-	return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 0);
+	return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
 }
 
 COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
 		       const char __user *, filename,
-		       const compat_uptr_t __user *, argv,
-		       const compat_uptr_t __user *, envp,
+		       const char __user *const __user *, argv,
+		       const char __user *const __user *, envp,
 		       int,  flags)
 {
 	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
 	struct filename *name = getname_flags(filename, lookup_flags, NULL);
 
-	return do_compat_execve(fd, name, argv, envp, flags);
+	return do_execveat(fd, name, argv, envp, flags, NULL);
 }
-#endif
+#endif /* CONFIG_X86_X32 */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e90100c0de72e4..5e8f6a588e0d43 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -752,10 +752,6 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg,
 asmlinkage long compat_sys_keyctl(u32 option,
 			      u32 arg2, u32 arg3, u32 arg4, u32 arg5);
 
-/* arch/example/kernel/sys_example.c */
-asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
-		     const compat_uptr_t __user *envp);
-
 /* mm/fadvise.c: No generic prototype for fadvise64_64 */
 
 /* mm/, CONFIG_MMU only */
@@ -806,9 +802,6 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 		const struct compat_iovec __user *lvec,
 		compat_ulong_t liovcnt, const struct compat_iovec __user *rvec,
 		compat_ulong_t riovcnt, compat_ulong_t flags);
-asmlinkage long compat_sys_execveat(int dfd, const char __user *filename,
-		     const compat_uptr_t __user *argv,
-		     const compat_uptr_t __user *envp, int flags);
 asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd,
 		const struct compat_iovec __user *vec,
 		compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f4a01305d9a65c..c639d04a094b8b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -640,7 +640,7 @@ __SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
 #define __NR_clone 220
 __SYSCALL(__NR_clone, sys_clone)
 #define __NR_execve 221
-__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
+__SYSCALL(__NR_execve, sys_execve)
 
 #define __NR3264_mmap 222
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
@@ -751,7 +751,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 280
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
-__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+__SYSCALL(__NR_execveat, sys_execveat)
 #define __NR_userfaultfd 282
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 283
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 3a3201e4618ef8..a6dbc8af8bd577 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -640,7 +640,7 @@ __SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
 #define __NR_clone 220
 __SYSCALL(__NR_clone, sys_clone)
 #define __NR_execve 221
-__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
+__SYSCALL(__NR_execve, sys_execve)
 
 #define __NR3264_mmap 222
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
@@ -751,7 +751,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 280
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
-__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+__SYSCALL(__NR_execveat, sys_execveat)
 #define __NR_userfaultfd 282
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 283
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 35b61bfc1b1ae9..42bf8b461a0ed6 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -18,7 +18,7 @@
 8	common	creat				sys_creat
 9	common	link				sys_link
 10	common	unlink				sys_unlink
-11	nospu	execve				sys_execve			compat_sys_execve
+11	nospu	execve				sys_execve			sys_execve
 12	common	chdir				sys_chdir
 13	32	time				sys_time32
 13	64	time				sys_time
@@ -454,7 +454,7 @@
 359	common	getrandom			sys_getrandom
 360	common	memfd_create			sys_memfd_create
 361	common	bpf				sys_bpf
-362	nospu	execveat			sys_execveat			compat_sys_execveat
+362	nospu	execveat			sys_execveat			sys_execveat
 363	32	switch_endian			sys_ni_syscall
 363	64	switch_endian			ppc_switch_endian
 363	spu	switch_endian			sys_ni_syscall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b38d48464368dc..f3c16f2d9746ac 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -18,7 +18,7 @@
 8    common	creat			sys_creat			compat_sys_creat
 9    common	link			sys_link			compat_sys_link
 10   common	unlink			sys_unlink			compat_sys_unlink
-11   common	execve			sys_execve			compat_sys_execve
+11   common	execve			sys_execve			sys_execve
 12   common	chdir			sys_chdir			compat_sys_chdir
 13   32		time			-				compat_sys_time
 14   common	mknod			sys_mknod			compat_sys_mknod
@@ -361,7 +361,7 @@
 351  common	bpf			sys_bpf				compat_sys_bpf
 352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		compat_sys_s390_pci_mmio_write
 353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		compat_sys_s390_pci_mmio_read
-354  common	execveat		sys_execveat			compat_sys_execveat
+354  common	execveat		sys_execveat			sys_execveat
 355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
 356  common	membarrier		sys_membarrier			sys_membarrier
 357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/6] exec: cleanup the count() function
  2020-06-15 13:00 properly support exec and wait with kernel pointers Christoph Hellwig
  2020-06-15 13:00 ` [PATCH 1/6] exec: cleanup the execve wrappers Christoph Hellwig
  2020-06-15 13:00 ` [PATCH 2/6] exec: simplify the compat syscall handling Christoph Hellwig
@ 2020-06-15 13:00 ` Christoph Hellwig
  2020-06-15 13:00 ` [PATCH 4/6] exec: split prepare_arg_pages Christoph Hellwig
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 13:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Arnd Bergmann, Luis Chamberlain, linux-arm-kernel, x86,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-fsdevel, linux-arch, linux-kernel

Remove the max argument as it is hard wired to MAX_ARG_STRINGS, and
give the function a slightly less generic name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 94107eceda8a67..6e4d9d1ffa35fa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -411,9 +411,9 @@ get_user_arg_ptr(const char __user *const __user *argv, int nr)
 }
 
 /*
- * count() counts the number of strings in array ARGV.
+ * count_strings() counts the number of strings in array ARGV.
  */
-static int count(const char __user *const __user *argv, int max)
+static int count_strings(const char __user *const __user *argv)
 {
 	int i = 0;
 
@@ -427,7 +427,7 @@ static int count(const char __user *const __user *argv, int max)
 			if (IS_ERR(p))
 				return -EFAULT;
 
-			if (i >= max)
+			if (i >= MAX_ARG_STRINGS)
 				return -E2BIG;
 			++i;
 
@@ -445,11 +445,11 @@ static int prepare_arg_pages(struct linux_binprm *bprm,
 {
 	unsigned long limit, ptr_size;
 
-	bprm->argc = count(argv, MAX_ARG_STRINGS);
+	bprm->argc = count_strings(argv);
 	if (bprm->argc < 0)
 		return bprm->argc;
 
-	bprm->envc = count(envp, MAX_ARG_STRINGS);
+	bprm->envc = count_strings(envp);
 	if (bprm->envc < 0)
 		return bprm->envc;
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/6] exec: split prepare_arg_pages
  2020-06-15 13:00 properly support exec and wait with kernel pointers Christoph Hellwig
                   ` (2 preceding siblings ...)
  2020-06-15 13:00 ` [PATCH 3/6] exec: cleanup the count() function Christoph Hellwig
@ 2020-06-15 13:00 ` Christoph Hellwig
  2020-06-15 13:00 ` [PATCH 5/6] exec: add a kernel_execveat helper Christoph Hellwig
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 13:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Arnd Bergmann, Luis Chamberlain, linux-arm-kernel, x86,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-fsdevel, linux-arch, linux-kernel

Move counting the arguments and enviroment variables out of
prepare_arg_pages and rename the rest of the function to check_arg_limit.
This prepares for a version of do_execvat that takes kernel pointers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 6e4d9d1ffa35fa..696b1e8180d7d8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -439,20 +439,10 @@ static int count_strings(const char __user *const __user *argv)
 	return i;
 }
 
-static int prepare_arg_pages(struct linux_binprm *bprm,
-		const char __user *const __user *argv,
-		const char __user *const __user *envp)
+static int check_arg_limit(struct linux_binprm *bprm)
 {
 	unsigned long limit, ptr_size;
 
-	bprm->argc = count_strings(argv);
-	if (bprm->argc < 0)
-		return bprm->argc;
-
-	bprm->envc = count_strings(envp);
-	if (bprm->envc < 0)
-		return bprm->envc;
-
 	/*
 	 * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
 	 * (whichever is smaller) for the argv+env strings.
@@ -1890,7 +1880,19 @@ int do_execveat(int fd, struct filename *filename,
 	if (retval)
 		goto out_unmark;
 
-	retval = prepare_arg_pages(bprm, argv, envp);
+	bprm->argc = count_strings(argv);
+	if (bprm->argc < 0) {
+		retval = bprm->argc;
+		goto out;
+	}
+
+	bprm->envc = count_strings(envp);
+	if (bprm->envc < 0) {
+		retval = bprm->envc;
+		goto out;
+	}
+
+	retval = check_arg_limit(bprm);
 	if (retval < 0)
 		goto out;
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/6] exec: add a kernel_execveat helper
  2020-06-15 13:00 properly support exec and wait with kernel pointers Christoph Hellwig
                   ` (3 preceding siblings ...)
  2020-06-15 13:00 ` [PATCH 4/6] exec: split prepare_arg_pages Christoph Hellwig
@ 2020-06-15 13:00 ` Christoph Hellwig
  2020-06-15 13:00 ` [PATCH 6/6] kernel: add a kernel_wait helper Christoph Hellwig
  2020-06-15 13:42 ` properly support exec and wait with kernel pointers Arnd Bergmann
  6 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 13:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Arnd Bergmann, Luis Chamberlain, linux-arm-kernel, x86,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-fsdevel, linux-arch, linux-kernel

Add a kernel_execveat helper to execute a binary with kernel space argv
and envp pointers.  Switch executing init and user mode helpers to this
new helper instead of relying on the implicit set_fs(KERNEL_DS) for early
init code and kernel threads, and move the getname call into the
do_execve helper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c               | 116 +++++++++++++++++++++++++++++++---------
 include/linux/binfmts.h |   6 +--
 init/main.c             |   6 +--
 kernel/umh.c            |   8 ++-
 4 files changed, 97 insertions(+), 39 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 696b1e8180d7d8..9e16d56aa53e86 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -439,6 +439,21 @@ static int count_strings(const char __user *const __user *argv)
 	return i;
 }
 
+static int count_kernel_strings(const char *const *argv)
+{
+	int i;
+
+	if (!argv)
+		return 0;
+
+	for (i = 0; argv[i]; i++) {
+		if (i >= MAX_ARG_STRINGS)
+			return -E2BIG;
+	}
+
+	return i;
+}
+
 static int check_arg_limit(struct linux_binprm *bprm)
 {
 	unsigned long limit, ptr_size;
@@ -615,6 +630,19 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
 }
 EXPORT_SYMBOL(copy_string_kernel);
 
+static int copy_strings_kernel(int argc, const char *const *argv,
+		struct linux_binprm *bprm)
+{
+	int ret;
+
+	while (argc-- > 0) {
+		ret = copy_string_kernel(argv[argc], bprm);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
 #ifdef CONFIG_MMU
 
 /*
@@ -1797,9 +1825,11 @@ static int exec_binprm(struct linux_binprm *bprm)
 	return 0;
 }
 
-int do_execveat(int fd, struct filename *filename,
+static int __do_execveat(int fd, struct filename *filename,
 		const char __user *const __user *argv,
 		const char __user *const __user *envp,
+		const char *const *kernel_argv,
+		const char *const *kernel_envp,
 		int flags, struct file *file)
 {
 	char *pathbuf = NULL;
@@ -1880,16 +1910,30 @@ int do_execveat(int fd, struct filename *filename,
 	if (retval)
 		goto out_unmark;
 
-	bprm->argc = count_strings(argv);
-	if (bprm->argc < 0) {
-		retval = bprm->argc;
-		goto out;
-	}
+	if (unlikely(kernel_argv)) {
+		bprm->argc = count_kernel_strings(kernel_argv);
+		if (bprm->argc < 0) {
+			retval = bprm->argc;
+			goto out;
+		}
 
-	bprm->envc = count_strings(envp);
-	if (bprm->envc < 0) {
-		retval = bprm->envc;
-		goto out;
+		bprm->envc = count_kernel_strings(kernel_envp);
+		if (bprm->envc < 0) {
+			retval = bprm->envc;
+			goto out;
+		}
+	} else {
+		bprm->argc = count_strings(argv);
+		if (bprm->argc < 0) {
+			retval = bprm->argc;
+			goto out;
+		}
+
+		bprm->envc = count_strings(envp);
+		if (bprm->envc < 0) {
+			retval = bprm->envc;
+			goto out;
+		}
 	}
 
 	retval = check_arg_limit(bprm);
@@ -1906,13 +1950,22 @@ int do_execveat(int fd, struct filename *filename,
 		goto out;
 
 	bprm->exec = bprm->p;
-	retval = copy_strings(bprm->envc, envp, bprm);
-	if (retval < 0)
-		goto out;
 
-	retval = copy_strings(bprm->argc, argv, bprm);
-	if (retval < 0)
-		goto out;
+	if (unlikely(kernel_argv)) {
+		retval = copy_strings_kernel(bprm->envc, kernel_envp, bprm);
+		if (retval < 0)
+			goto out;
+		retval = copy_strings_kernel(bprm->argc, kernel_argv, bprm);
+		if (retval < 0)
+			goto out;
+	} else {
+		retval = copy_strings(bprm->envc, envp, bprm);
+		if (retval < 0)
+			goto out;
+		retval = copy_strings(bprm->argc, argv, bprm);
+		if (retval < 0)
+			goto out;
+	}
 
 	retval = exec_binprm(bprm);
 	if (retval < 0)
@@ -1963,6 +2016,23 @@ int do_execveat(int fd, struct filename *filename,
 	return retval;
 }
 
+static int do_execveat(int fd, const char *filename,
+		       const char __user *const __user *argv,
+		       const char __user *const __user *envp, int flags)
+{
+	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+	struct filename *name = getname_flags(filename, lookup_flags, NULL);
+
+	return __do_execveat(fd, name, argv, envp, NULL, NULL, flags, NULL);
+}
+
+int kernel_execveat(int fd, const char *filename, const char *const *argv,
+		const char *const *envp, int flags, struct file *file)
+{
+	return __do_execveat(fd, getname_kernel(filename), NULL, NULL, argv,
+			     envp, flags, file);
+}
+
 void set_binfmt(struct linux_binfmt *new)
 {
 	struct mm_struct *mm = current->mm;
@@ -1992,7 +2062,7 @@ SYSCALL_DEFINE3(execve,
 		const char __user *const __user *, argv,
 		const char __user *const __user *, envp)
 {
-	return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
+	return do_execveat(AT_FDCWD, filename, argv, envp, 0);
 }
 
 SYSCALL_DEFINE5(execveat,
@@ -2001,10 +2071,7 @@ SYSCALL_DEFINE5(execveat,
 		const char __user *const __user *, envp,
 		int, flags)
 {
-	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
-	struct filename *name = getname_flags(filename, lookup_flags, NULL);
-
-	return do_execveat(fd, name, argv, envp, flags, NULL);
+	return do_execveat(fd, filename, argv, envp, flags);
 }
 
 /*
@@ -2017,7 +2084,7 @@ COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
 		       const char __user *const __user *, argv,
 		       const char __user *const __user *, envp)
 {
-	return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
+	return do_execveat(AT_FDCWD, filename, argv, envp, 0);
 }
 
 COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
@@ -2026,9 +2093,6 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
 		       const char __user *const __user *, envp,
 		       int,  flags)
 {
-	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
-	struct filename *name = getname_flags(filename, lookup_flags, NULL);
-
-	return do_execveat(fd, name, argv, envp, flags, NULL);
+	return do_execveat(fd, filename, argv, envp, flags);
 }
 #endif /* CONFIG_X86_X32 */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index bed702e4b1fbd9..1e61c980c16354 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -134,9 +134,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
-int do_execveat(int fd, struct filename *filename,
-		const char __user *const __user *__argv,
-		const char __user *const __user *__envp,
-		int flags, struct file *file);
+int kernel_execveat(int fd, const char *filename, const char *const *argv,
+		const char *const *envp, int flags, struct file *file);
 
 #endif /* _LINUX_BINFMTS_H */
diff --git a/init/main.c b/init/main.c
index 838950ea7bca22..33de235dc2aa00 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1329,10 +1329,8 @@ static int run_init_process(const char *init_filename)
 	pr_debug("  with environment:\n");
 	for (p = envp_init; *p; p++)
 		pr_debug("    %s\n", *p);
-	return do_execveat(AT_FDCWD, getname_kernel(init_filename),
-			(const char __user *const __user *)argv_init,
-			(const char __user *const __user *)envp_init,
-			0, NULL);
+	return kernel_execveat(AT_FDCWD, init_filename, argv_init, envp_init, 0,
+			       NULL);
 }
 
 static int try_to_run_init_process(const char *init_filename)
diff --git a/kernel/umh.c b/kernel/umh.c
index 7aa9a5817582ca..1284823dbad338 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -103,11 +103,9 @@ static int call_usermodehelper_exec_async(void *data)
 	commit_creds(new);
 
 	sub_info->pid = task_pid_nr(current);
-	retval = do_execveat(AT_FDCWD,
-			sub_info->path ? getname_kernel(sub_info->path) : NULL,
-			(const char __user *const __user *)sub_info->argv,
-			(const char __user *const __user *)sub_info->envp,
-			0, sub_info->file);
+	retval = kernel_execveat(AT_FDCWD, sub_info->path,
+			(const char *const *)sub_info->argv,
+			(const char *const *)sub_info->envp, 0, sub_info->file);
 	if (sub_info->file && !retval)
 		current->flags |= PF_UMH;
 out:
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 6/6] kernel: add a kernel_wait helper
  2020-06-15 13:00 properly support exec and wait with kernel pointers Christoph Hellwig
                   ` (4 preceding siblings ...)
  2020-06-15 13:00 ` [PATCH 5/6] exec: add a kernel_execveat helper Christoph Hellwig
@ 2020-06-15 13:00 ` Christoph Hellwig
  2020-06-15 13:42 ` properly support exec and wait with kernel pointers Arnd Bergmann
  6 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 13:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Arnd Bergmann, Luis Chamberlain, linux-arm-kernel, x86,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-fsdevel, linux-arch, linux-kernel

Add a helper that waits for a pid and stores the status in the passed
in kernel pointer.  Use it to fix the usage of kernel_wait4 in
call_usermodehelper_exec_sync that only happens to work due to the
implicit set_fs(KERNEL_DS) for kernel threads.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/sched/task.h |  1 +
 kernel/exit.c              | 16 ++++++++++++++++
 kernel/umh.c               | 29 ++++-------------------------
 3 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 38359071236ad7..a80007df396e95 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -102,6 +102,7 @@ struct task_struct *fork_idle(int);
 struct mm_struct *copy_init_mm(void);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
+int kernel_wait(pid_t pid, int *stat);
 
 extern void free_task(struct task_struct *tsk);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 727150f2810338..fd598846df0b17 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1626,6 +1626,22 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
 	return ret;
 }
 
+int kernel_wait(pid_t pid, int *stat)
+{
+	struct wait_opts wo = {
+		.wo_type	= PIDTYPE_PID,
+		.wo_pid		= find_get_pid(pid),
+		.wo_flags	= WEXITED,
+	};
+	int ret;
+
+	ret = do_wait(&wo);
+	if (ret > 0 && wo.wo_stat)
+		*stat = wo.wo_stat;
+	put_pid(wo.wo_pid);
+	return ret;
+}
+
 SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
 		int, options, struct rusage __user *, ru)
 {
diff --git a/kernel/umh.c b/kernel/umh.c
index 1284823dbad338..6fd948e478bec4 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -126,37 +126,16 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
 {
 	pid_t pid;
 
-	/* If SIGCLD is ignored kernel_wait4 won't populate the status. */
+	/* If SIGCLD is ignored do_wait won't populate the status. */
 	kernel_sigaction(SIGCHLD, SIG_DFL);
 	pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
-	if (pid < 0) {
+	if (pid < 0)
 		sub_info->retval = pid;
-	} else {
-		int ret = -ECHILD;
-		/*
-		 * Normally it is bogus to call wait4() from in-kernel because
-		 * wait4() wants to write the exit code to a userspace address.
-		 * But call_usermodehelper_exec_sync() always runs as kernel
-		 * thread (workqueue) and put_user() to a kernel address works
-		 * OK for kernel threads, due to their having an mm_segment_t
-		 * which spans the entire address space.
-		 *
-		 * Thus the __user pointer cast is valid here.
-		 */
-		kernel_wait4(pid, (int __user *)&ret, 0, NULL);
-
-		/*
-		 * If ret is 0, either call_usermodehelper_exec_async failed and
-		 * the real error code is already in sub_info->retval or
-		 * sub_info->retval is 0 anyway, so don't mess with it then.
-		 */
-		if (ret)
-			sub_info->retval = ret;
-	}
+	else
+		kernel_wait(pid, &sub_info->retval);
 
 	/* Restore default kernel sig handler */
 	kernel_sigaction(SIGCHLD, SIG_IGN);
-
 	umh_complete(sub_info);
 }
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 13:00 ` [PATCH 2/6] exec: simplify the compat syscall handling Christoph Hellwig
@ 2020-06-15 13:31   ` Arnd Bergmann
  2020-06-15 14:12     ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Arnd Bergmann @ 2020-06-15 13:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Luis Chamberlain, Linux ARM, the arch/x86 maintainers,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, Linux FS-devel Mailing List, linux-arch,
	linux-kernel

On Mon, Jun 15, 2020 at 3:00 PM Christoph Hellwig <hch@lst.de> wrote:
>
> The only differenence betweeen the compat exec* syscalls and their
> native versions is that compat_ptr sign extension, and the fact that
> the pointer arithmetics for the two dimensional arrays needs to use
> the compat pointer size.  Instead of the compat wrappers and the
> struct user_arg_ptr machinery just use in_compat_syscall() to do the
> right thing for the compat case deep inside get_user_arg_ptr().
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Nice!

I have an older patch doing the same for sys_mount() somewhere,
but never got around to send that. One day we should see if we
can just do this for all of them.

> -
> -static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
> +static const char __user *
> +get_user_arg_ptr(const char __user *const __user *argv, int nr)
>  {
>         const char __user *native;
>
>  #ifdef CONFIG_COMPAT
> -       if (unlikely(argv.is_compat)) {
> +       if (in_compat_syscall()) {
> +               const compat_uptr_t __user *compat_argv =
> +                       compat_ptr((unsigned long)argv);
>                 compat_uptr_t compat;
>
> -               if (get_user(compat, argv.ptr.compat + nr))
> +               if (get_user(compat, compat_argv + nr))
>                         return ERR_PTR(-EFAULT);
>
>                 return compat_ptr(compat);
>         }
>  #endif

I would expect that the "#ifdef CONFIG_COMPAT" can be removed
now, since compat_ptr() and in_compat_syscall() are now defined
unconditionally. I have not tried that though.

> +/*
> + * x32 syscalls are listed in the same table as x86_64 ones, so we need to
> + * define compat syscalls that are exactly the same as the native version for
> + * the syscall table machinery to work.  Sigh..
> + */
> +#ifdef CONFIG_X86_X32
>  COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
> -       const compat_uptr_t __user *, argv,
> -       const compat_uptr_t __user *, envp)
> +                      const char __user *const __user *, argv,
> +                      const char __user *const __user *, envp)
>  {
> -       return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 0);
> +       return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
>  }

Maybe move it to arch/x86/kernel/process_64.c or arch/x86/entry/syscall_x32.c
to keep it out of the common code if this is needed. I don't really understand
the comment, why can't this just use this?

--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -379,7 +379,7 @@
 517    x32     recvfrom                compat_sys_recvfrom
 518    x32     sendmsg                 compat_sys_sendmsg
 519    x32     recvmsg                 compat_sys_recvmsg
-520    x32     execve                  compat_sys_execve
+520    x32     execve                  sys_execve
 521    x32     ptrace                  compat_sys_ptrace
 522    x32     rt_sigpending           compat_sys_rt_sigpending
 523    x32     rt_sigtimedwait         compat_sys_rt_sigtimedwait_time64
@@ -404,7 +404,7 @@
 542    x32     getsockopt              compat_sys_getsockopt
 543    x32     io_setup                compat_sys_io_setup
 544    x32     io_submit               compat_sys_io_submit
-545    x32     execveat                compat_sys_execveat
+545    x32     execveat                sys_execveat
 546    x32     preadv2                 compat_sys_preadv64v2
 547    x32     pwritev2                compat_sys_pwritev64v2
 548    x32     process_madvise         compat_sys_process_madvise

       Arnd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: properly support exec and wait with kernel pointers
  2020-06-15 13:00 properly support exec and wait with kernel pointers Christoph Hellwig
                   ` (5 preceding siblings ...)
  2020-06-15 13:00 ` [PATCH 6/6] kernel: add a kernel_wait helper Christoph Hellwig
@ 2020-06-15 13:42 ` Arnd Bergmann
  6 siblings, 0 replies; 19+ messages in thread
From: Arnd Bergmann @ 2020-06-15 13:42 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Luis Chamberlain, Linux ARM, the arch/x86 maintainers,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, Linux FS-devel Mailing List, linux-arch,
	linux-kernel

On Mon, Jun 15, 2020 at 3:00 PM Christoph Hellwig <hch@lst.de> wrote:
>
> Hi all,
>
> this series first cleans up the exec code and then adds proper
> kernel_execveat and kernel_wait callers instead of relying on the fact
> that the early init code and kernel threads implicitly run with
> the address limit set to KERNEL_DS.
>
> Note that the cleanup removes the compat execve(at) handlers (almost)
> entirely, as we can handle the compat difference very nicely in a
> unified codebase.  The only exception is x86 where this would list the
> handlers twice in the same syscall table due to the messed up x32
> design.  I had to add an extra compat handler just for that case, but
> maybe someone has a better idea.

I looked at all the patches and I like it a lot. I replied with some suggestions
for x32, but maybe I misunderstood what its problem is, as I don't see
anything preventing us from having two entries in the x32 table pointing
to the same function.

       Arnd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 13:31   ` Arnd Bergmann
@ 2020-06-15 14:12     ` Christoph Hellwig
  2020-06-15 14:40       ` Arnd Bergmann
  2020-06-15 14:48       ` Brian Gerst
  0 siblings, 2 replies; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 14:12 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Christoph Hellwig, Al Viro, Luis Chamberlain, Linux ARM,
	the arch/x86 maintainers, open list:BROADCOM NVRAM DRIVER,
	Parisc List, linuxppc-dev, linux-s390, sparclinux,
	Linux FS-devel Mailing List, linux-arch, linux-kernel

On Mon, Jun 15, 2020 at 03:31:35PM +0200, Arnd Bergmann wrote:
> >  #ifdef CONFIG_COMPAT
> > -       if (unlikely(argv.is_compat)) {
> > +       if (in_compat_syscall()) {
> > +               const compat_uptr_t __user *compat_argv =
> > +                       compat_ptr((unsigned long)argv);
> >                 compat_uptr_t compat;
> >
> > -               if (get_user(compat, argv.ptr.compat + nr))
> > +               if (get_user(compat, compat_argv + nr))
> >                         return ERR_PTR(-EFAULT);
> >
> >                 return compat_ptr(compat);
> >         }
> >  #endif
> 
> I would expect that the "#ifdef CONFIG_COMPAT" can be removed
> now, since compat_ptr() and in_compat_syscall() are now defined
> unconditionally. I have not tried that though.

True, I'll give it a spin.

> > +/*
> > + * x32 syscalls are listed in the same table as x86_64 ones, so we need to
> > + * define compat syscalls that are exactly the same as the native version for
> > + * the syscall table machinery to work.  Sigh..
> > + */
> > +#ifdef CONFIG_X86_X32
> >  COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
> > -       const compat_uptr_t __user *, argv,
> > -       const compat_uptr_t __user *, envp)
> > +                      const char __user *const __user *, argv,
> > +                      const char __user *const __user *, envp)
> >  {
> > -       return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 0);
> > +       return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
> >  }
> 
> Maybe move it to arch/x86/kernel/process_64.c or arch/x86/entry/syscall_x32.c
> to keep it out of the common code if this is needed.

I'd rather keep it in common code as that allows all the low-level
exec stuff to be marked static, and avoid us growing new pointless
compat variants through copy and paste.
smart compiler to d

> I don't really understand
> the comment, why can't this just use this?

That errors out with:

ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
`__x32_sys_execve'
ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
`__x32_sys_execveat'
make: *** [Makefile:1139: vmlinux] Error 1

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 14:12     ` Christoph Hellwig
@ 2020-06-15 14:40       ` Arnd Bergmann
  2020-06-15 14:43         ` Christoph Hellwig
  2020-06-15 14:48       ` Brian Gerst
  1 sibling, 1 reply; 19+ messages in thread
From: Arnd Bergmann @ 2020-06-15 14:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Luis Chamberlain, Linux ARM, the arch/x86 maintainers,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, Linux FS-devel Mailing List, linux-arch,
	linux-kernel

On Mon, Jun 15, 2020 at 4:12 PM Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Jun 15, 2020 at 03:31:35PM +0200, Arnd Bergmann wrote:

>
> > I don't really understand
> > the comment, why can't this just use this?
>
> That errors out with:
>
> ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
> `__x32_sys_execve'
> ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
> `__x32_sys_execveat'
> make: *** [Makefile:1139: vmlinux] Error 1

Ah, I see: it's marked x32-only, so arch/x86/entry/syscall_x32.c
uses the __x32 prefix instead of the __x64 one. Marking it 'common'
instead would make it work, but also create an extra entry point
for native processes, something that commit
6365b842aae4 ("x86/syscalls: Split the x32 syscalls into their own table")
was trying to avoid.

      Arnd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 14:40       ` Arnd Bergmann
@ 2020-06-15 14:43         ` Christoph Hellwig
  2020-06-15 14:46           ` Arnd Bergmann
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 14:43 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Christoph Hellwig, Al Viro, Luis Chamberlain, Linux ARM,
	the arch/x86 maintainers, open list:BROADCOM NVRAM DRIVER,
	Parisc List, linuxppc-dev, linux-s390, sparclinux,
	Linux FS-devel Mailing List, linux-arch, linux-kernel

On Mon, Jun 15, 2020 at 04:40:28PM +0200, Arnd Bergmann wrote:
> > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
> > `__x32_sys_execve'
> > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
> > `__x32_sys_execveat'
> > make: *** [Makefile:1139: vmlinux] Error 1
> 
> Ah, I see: it's marked x32-only, so arch/x86/entry/syscall_x32.c
> uses the __x32 prefix instead of the __x64 one. Marking it 'common'
> instead would make it work, but also create an extra entry point
> for native processes, something that commit
> 6365b842aae4 ("x86/syscalls: Split the x32 syscalls into their own table")
> was trying to avoid.

Marking it common also doesn't compile at all because __NR_execve
and __NR_execveat get redefined in unistd_64.h.  I then tried to rename
the x32 versions, which failed in yet another way.  At that point I gave
up instead of digging myself into a deeper hole..

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 14:43         ` Christoph Hellwig
@ 2020-06-15 14:46           ` Arnd Bergmann
  2020-06-15 15:09             ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Arnd Bergmann @ 2020-06-15 14:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Luis Chamberlain, Linux ARM, the arch/x86 maintainers,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, Linux FS-devel Mailing List, linux-arch,
	linux-kernel

On Mon, Jun 15, 2020 at 4:43 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Mon, Jun 15, 2020 at 04:40:28PM +0200, Arnd Bergmann wrote:
> > > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
> > > `__x32_sys_execve'
> > > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
> > > `__x32_sys_execveat'
> > > make: *** [Makefile:1139: vmlinux] Error 1
> >
> > Ah, I see: it's marked x32-only, so arch/x86/entry/syscall_x32.c
> > uses the __x32 prefix instead of the __x64 one. Marking it 'common'
> > instead would make it work, but also create an extra entry point
> > for native processes, something that commit
> > 6365b842aae4 ("x86/syscalls: Split the x32 syscalls into their own table")
> > was trying to avoid.
>
> Marking it common also doesn't compile at all because __NR_execve
> and __NR_execveat get redefined in unistd_64.h.  I then tried to rename
> the x32 versions, which failed in yet another way.  At that point I gave
> up instead of digging myself into a deeper hole..

How about this one:

diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 3d8d70d3896c..0ce15807cf54 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -16,6 +16,9 @@
 #undef __SYSCALL_X32
 #undef __SYSCALL_COMMON

+#define __x32_sys_execve __x64_sys_execve
+#define __x32_sys_execveat __x64_sys_execveat
+
 #define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
 #define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,

Still ugly, but much simpler and more localized (if it works).

        Arnd

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 14:12     ` Christoph Hellwig
  2020-06-15 14:40       ` Arnd Bergmann
@ 2020-06-15 14:48       ` Brian Gerst
  2020-06-15 18:47         ` Arnd Bergmann
  1 sibling, 1 reply; 19+ messages in thread
From: Brian Gerst @ 2020-06-15 14:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Arnd Bergmann, Al Viro, Luis Chamberlain, Linux ARM,
	the arch/x86 maintainers, open list:BROADCOM NVRAM DRIVER,
	Parisc List, linuxppc-dev, linux-s390, sparclinux,
	Linux FS-devel Mailing List, linux-arch, linux-kernel

On Mon, Jun 15, 2020 at 10:13 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Mon, Jun 15, 2020 at 03:31:35PM +0200, Arnd Bergmann wrote:
> > >  #ifdef CONFIG_COMPAT
> > > -       if (unlikely(argv.is_compat)) {
> > > +       if (in_compat_syscall()) {
> > > +               const compat_uptr_t __user *compat_argv =
> > > +                       compat_ptr((unsigned long)argv);
> > >                 compat_uptr_t compat;
> > >
> > > -               if (get_user(compat, argv.ptr.compat + nr))
> > > +               if (get_user(compat, compat_argv + nr))
> > >                         return ERR_PTR(-EFAULT);
> > >
> > >                 return compat_ptr(compat);
> > >         }
> > >  #endif
> >
> > I would expect that the "#ifdef CONFIG_COMPAT" can be removed
> > now, since compat_ptr() and in_compat_syscall() are now defined
> > unconditionally. I have not tried that though.
>
> True, I'll give it a spin.
>
> > > +/*
> > > + * x32 syscalls are listed in the same table as x86_64 ones, so we need to
> > > + * define compat syscalls that are exactly the same as the native version for
> > > + * the syscall table machinery to work.  Sigh..
> > > + */
> > > +#ifdef CONFIG_X86_X32
> > >  COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
> > > -       const compat_uptr_t __user *, argv,
> > > -       const compat_uptr_t __user *, envp)
> > > +                      const char __user *const __user *, argv,
> > > +                      const char __user *const __user *, envp)
> > >  {
> > > -       return do_compat_execve(AT_FDCWD, getname(filename), argv, envp, 0);
> > > +       return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
> > >  }
> >
> > Maybe move it to arch/x86/kernel/process_64.c or arch/x86/entry/syscall_x32.c
> > to keep it out of the common code if this is needed.
>
> I'd rather keep it in common code as that allows all the low-level
> exec stuff to be marked static, and avoid us growing new pointless
> compat variants through copy and paste.
> smart compiler to d
>
> > I don't really understand
> > the comment, why can't this just use this?
>
> That errors out with:
>
> ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
> `__x32_sys_execve'
> ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
> `__x32_sys_execveat'
> make: *** [Makefile:1139: vmlinux] Error 1

I think I have a fix for this, by modifying the syscall wrappers to
add an alias for the __x32 variant to the native __x64_sys_foo().
I'll get back to you with a patch.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 14:46           ` Arnd Bergmann
@ 2020-06-15 15:09             ` Christoph Hellwig
  2020-06-15 15:33               ` Brian Gerst
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 15:09 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Christoph Hellwig, Al Viro, Luis Chamberlain, Linux ARM,
	the arch/x86 maintainers, open list:BROADCOM NVRAM DRIVER,
	Parisc List, linuxppc-dev, linux-s390, sparclinux,
	Linux FS-devel Mailing List, linux-arch, linux-kernel

On Mon, Jun 15, 2020 at 04:46:15PM +0200, Arnd Bergmann wrote:
> How about this one:
> 
> diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
> index 3d8d70d3896c..0ce15807cf54 100644
> --- a/arch/x86/entry/syscall_x32.c
> +++ b/arch/x86/entry/syscall_x32.c
> @@ -16,6 +16,9 @@
>  #undef __SYSCALL_X32
>  #undef __SYSCALL_COMMON
> 
> +#define __x32_sys_execve __x64_sys_execve
> +#define __x32_sys_execveat __x64_sys_execveat
> +


arch/x86/entry/syscall_x32.c:19:26: error: ‘__x64_sys_execve’ undeclared here (not in a function); did you mean ‘__x32_sys_execve’?
   19 | #define __x32_sys_execve __x64_sys_execve
      |                          ^~~~~~~~~~~~~~~~
arch/x86/entry/syscall_x32.c:22:39: note: in expansion of macro ‘__x32_sys_execve’
   22 | #define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
      |                                       ^~~~~~
./arch/x86/include/generated/asm/syscalls_64.h:344:1: note: in expansion of macro ‘__SYSCALL_X32’
  344 | __SYSCALL_X32(520, sys_execve)
      | ^~~~~~~~~~~~~
arch/x86/entry/syscall_x32.c:20:28: error: ‘__x64_sys_execveat’ undeclared here (not in a function); did you mean ‘__x32_sys_execveat’?
   20 | #define __x32_sys_execveat __x64_sys_execveat
      |                            ^~~~~~~~~~~~~~~~~~
arch/x86/entry/syscall_x32.c:22:39: note: in expansion of macro ‘__x32_sys_execveat’
   22 | #define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
      |                                       ^~~~~~
./arch/x86/include/generated/asm/syscalls_64.h:369:1: note: in expansion of macro ‘__SYSCALL_X32’
  369 | __SYSCALL_X32(545, sys_execveat)
      | ^~~~~~~~~~~~~
make[2]: *** [scripts/Makefile.build:281: arch/x86/entry/syscall_x32.o] Error 1
make[1]: *** [scripts/Makefile.build:497: arch/x86/entry] Error 2
make[1]: *** Waiting for unfinished jobs....
kernel/exit.o: warning: objtool: __x64_sys_exit_group()+0x14: unreachable instruction
make: *** [Makefile:1764: arch/x86] Error 2
make: *** Waiting for unfinished jobs....

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 15:09             ` Christoph Hellwig
@ 2020-06-15 15:33               ` Brian Gerst
  2020-06-15 16:41                 ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Brian Gerst @ 2020-06-15 15:33 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Arnd Bergmann, Al Viro, Luis Chamberlain, Linux ARM,
	the arch/x86 maintainers, open list:BROADCOM NVRAM DRIVER,
	Parisc List, linuxppc-dev, linux-s390, sparclinux,
	Linux FS-devel Mailing List, linux-arch, linux-kernel

On Mon, Jun 15, 2020 at 11:10 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Mon, Jun 15, 2020 at 04:46:15PM +0200, Arnd Bergmann wrote:
> > How about this one:
> >
> > diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
> > index 3d8d70d3896c..0ce15807cf54 100644
> > --- a/arch/x86/entry/syscall_x32.c
> > +++ b/arch/x86/entry/syscall_x32.c
> > @@ -16,6 +16,9 @@
> >  #undef __SYSCALL_X32
> >  #undef __SYSCALL_COMMON
> >
> > +#define __x32_sys_execve __x64_sys_execve
> > +#define __x32_sys_execveat __x64_sys_execveat
> > +
>
>
> arch/x86/entry/syscall_x32.c:19:26: error: ‘__x64_sys_execve’ undeclared here (not in a function); did you mean ‘__x32_sys_execve’?
>    19 | #define __x32_sys_execve __x64_sys_execve
>       |                          ^~~~~~~~~~~~~~~~
> arch/x86/entry/syscall_x32.c:22:39: note: in expansion of macro ‘__x32_sys_execve’
>    22 | #define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
>       |                                       ^~~~~~
> ./arch/x86/include/generated/asm/syscalls_64.h:344:1: note: in expansion of macro ‘__SYSCALL_X32’
>   344 | __SYSCALL_X32(520, sys_execve)
>       | ^~~~~~~~~~~~~
> arch/x86/entry/syscall_x32.c:20:28: error: ‘__x64_sys_execveat’ undeclared here (not in a function); did you mean ‘__x32_sys_execveat’?
>    20 | #define __x32_sys_execveat __x64_sys_execveat
>       |                            ^~~~~~~~~~~~~~~~~~
> arch/x86/entry/syscall_x32.c:22:39: note: in expansion of macro ‘__x32_sys_execveat’
>    22 | #define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
>       |                                       ^~~~~~
> ./arch/x86/include/generated/asm/syscalls_64.h:369:1: note: in expansion of macro ‘__SYSCALL_X32’
>   369 | __SYSCALL_X32(545, sys_execveat)
>       | ^~~~~~~~~~~~~
> make[2]: *** [scripts/Makefile.build:281: arch/x86/entry/syscall_x32.o] Error 1
> make[1]: *** [scripts/Makefile.build:497: arch/x86/entry] Error 2
> make[1]: *** Waiting for unfinished jobs....
> kernel/exit.o: warning: objtool: __x64_sys_exit_group()+0x14: unreachable instruction
> make: *** [Makefile:1764: arch/x86] Error 2
> make: *** Waiting for unfinished jobs....

If you move those aliases above all the __SYSCALL_* defines it will
work, since that will get the forward declaration too.  This would be
the simplest workaround.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 15:33               ` Brian Gerst
@ 2020-06-15 16:41                 ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2020-06-15 16:41 UTC (permalink / raw)
  To: Brian Gerst
  Cc: Christoph Hellwig, Arnd Bergmann, Al Viro, Luis Chamberlain,
	Linux ARM, the arch/x86 maintainers,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, Linux FS-devel Mailing List, linux-arch,
	linux-kernel

On Mon, Jun 15, 2020 at 11:33:49AM -0400, Brian Gerst wrote:
> If you move those aliases above all the __SYSCALL_* defines it will
> work, since that will get the forward declaration too.  This would be
> the simplest workaround.

That compiles and also passes my exaustive x32 tests (chroot + ls -l).

This is the updated version:

http://git.infradead.org/users/hch/misc.git/commitdiff/c8d319711ad2f53be003ae8e9be08519068bdcee

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 14:48       ` Brian Gerst
@ 2020-06-15 18:47         ` Arnd Bergmann
  2020-06-15 19:45           ` Brian Gerst
  0 siblings, 1 reply; 19+ messages in thread
From: Arnd Bergmann @ 2020-06-15 18:47 UTC (permalink / raw)
  To: Brian Gerst
  Cc: Christoph Hellwig, Al Viro, Luis Chamberlain, Linux ARM,
	the arch/x86 maintainers, open list:BROADCOM NVRAM DRIVER,
	Parisc List, linuxppc-dev, linux-s390, sparclinux,
	Linux FS-devel Mailing List, linux-arch, linux-kernel

On Mon, Jun 15, 2020 at 4:48 PM Brian Gerst <brgerst@gmail.com> wrote:
> On Mon, Jun 15, 2020 at 10:13 AM Christoph Hellwig <hch@lst.de> wrote:
> > On Mon, Jun 15, 2020 at 03:31:35PM +0200, Arnd Bergmann wrote:

> >
> > I'd rather keep it in common code as that allows all the low-level
> > exec stuff to be marked static, and avoid us growing new pointless
> > compat variants through copy and paste.
> > smart compiler to d
> >
> > > I don't really understand
> > > the comment, why can't this just use this?
> >
> > That errors out with:
> >
> > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
> > `__x32_sys_execve'
> > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
> > `__x32_sys_execveat'
> > make: *** [Makefile:1139: vmlinux] Error 1
>
> I think I have a fix for this, by modifying the syscall wrappers to
> add an alias for the __x32 variant to the native __x64_sys_foo().
> I'll get back to you with a patch.

Do we actually need the __x32 prefix any more, or could we just
change all x32 specific calls to use __x64_compat_sys_foo()?

      Arnd

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] exec: simplify the compat syscall handling
  2020-06-15 18:47         ` Arnd Bergmann
@ 2020-06-15 19:45           ` Brian Gerst
  0 siblings, 0 replies; 19+ messages in thread
From: Brian Gerst @ 2020-06-15 19:45 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Christoph Hellwig, Al Viro, Luis Chamberlain, Linux ARM,
	the arch/x86 maintainers, open list:BROADCOM NVRAM DRIVER,
	Parisc List, linuxppc-dev, linux-s390, sparclinux,
	Linux FS-devel Mailing List, linux-arch, linux-kernel

On Mon, Jun 15, 2020 at 2:47 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Mon, Jun 15, 2020 at 4:48 PM Brian Gerst <brgerst@gmail.com> wrote:
> > On Mon, Jun 15, 2020 at 10:13 AM Christoph Hellwig <hch@lst.de> wrote:
> > > On Mon, Jun 15, 2020 at 03:31:35PM +0200, Arnd Bergmann wrote:
>
> > >
> > > I'd rather keep it in common code as that allows all the low-level
> > > exec stuff to be marked static, and avoid us growing new pointless
> > > compat variants through copy and paste.
> > > smart compiler to d
> > >
> > > > I don't really understand
> > > > the comment, why can't this just use this?
> > >
> > > That errors out with:
> > >
> > > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1040): undefined reference to
> > > `__x32_sys_execve'
> > > ld: arch/x86/entry/syscall_x32.o:(.rodata+0x1108): undefined reference to
> > > `__x32_sys_execveat'
> > > make: *** [Makefile:1139: vmlinux] Error 1
> >
> > I think I have a fix for this, by modifying the syscall wrappers to
> > add an alias for the __x32 variant to the native __x64_sys_foo().
> > I'll get back to you with a patch.
>
> Do we actually need the __x32 prefix any more, or could we just
> change all x32 specific calls to use __x64_compat_sys_foo()?

I suppose that would work too.  The prefix really describes the
register mapping.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2020-06-15 19:45 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-15 13:00 properly support exec and wait with kernel pointers Christoph Hellwig
2020-06-15 13:00 ` [PATCH 1/6] exec: cleanup the execve wrappers Christoph Hellwig
2020-06-15 13:00 ` [PATCH 2/6] exec: simplify the compat syscall handling Christoph Hellwig
2020-06-15 13:31   ` Arnd Bergmann
2020-06-15 14:12     ` Christoph Hellwig
2020-06-15 14:40       ` Arnd Bergmann
2020-06-15 14:43         ` Christoph Hellwig
2020-06-15 14:46           ` Arnd Bergmann
2020-06-15 15:09             ` Christoph Hellwig
2020-06-15 15:33               ` Brian Gerst
2020-06-15 16:41                 ` Christoph Hellwig
2020-06-15 14:48       ` Brian Gerst
2020-06-15 18:47         ` Arnd Bergmann
2020-06-15 19:45           ` Brian Gerst
2020-06-15 13:00 ` [PATCH 3/6] exec: cleanup the count() function Christoph Hellwig
2020-06-15 13:00 ` [PATCH 4/6] exec: split prepare_arg_pages Christoph Hellwig
2020-06-15 13:00 ` [PATCH 5/6] exec: add a kernel_execveat helper Christoph Hellwig
2020-06-15 13:00 ` [PATCH 6/6] kernel: add a kernel_wait helper Christoph Hellwig
2020-06-15 13:42 ` properly support exec and wait with kernel pointers Arnd Bergmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).