linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin()
@ 2020-01-23 12:59 Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not Christophe Leroy
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Christophe Leroy @ 2020-01-23 12:59 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Linus Torvalds, Alexander Viro, Andrew Morton
  Cc: linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm

Some architectures grant full access to userspace regardless of the
address/len passed to user_access_begin(), but other architectures
only grant access to the requested area.

For example, on 32 bits powerpc (book3s/32), access is granted by
segments of 256 Mbytes.

Modify filldir() and filldir64() to request the real area they need
to get access to, i.e. the area covering the parent dirent (if any)
and the contiguous current dirent.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 9f79b78ef744 ("Convert filldir[64]() from __put_user() to unsafe_put_user()")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v2: have user_access_begin() cover both parent dirent (if any) and current dirent
v3: replaced by patch from Linus
---
 fs/readdir.c | 70 +++++++++++++++++++++++++---------------------------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/fs/readdir.c b/fs/readdir.c
index d26d5ea4de7b..4b466cbb0f3a 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -206,7 +206,7 @@ struct linux_dirent {
 struct getdents_callback {
 	struct dir_context ctx;
 	struct linux_dirent __user * current_dir;
-	struct linux_dirent __user * previous;
+	int prev_reclen;
 	int count;
 	int error;
 };
@@ -214,12 +214,13 @@ struct getdents_callback {
 static int filldir(struct dir_context *ctx, const char *name, int namlen,
 		   loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct linux_dirent __user * dirent;
+	struct linux_dirent __user *dirent, *prev;
 	struct getdents_callback *buf =
 		container_of(ctx, struct getdents_callback, ctx);
 	unsigned long d_ino;
 	int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
 		sizeof(long));
+	int prev_reclen;
 
 	buf->error = verify_dirent_name(name, namlen);
 	if (unlikely(buf->error))
@@ -232,28 +233,24 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 		buf->error = -EOVERFLOW;
 		return -EOVERFLOW;
 	}
-	dirent = buf->previous;
-	if (dirent && signal_pending(current))
+	prev_reclen = buf->prev_reclen;
+	if (prev_reclen && signal_pending(current))
 		return -EINTR;
-
-	/*
-	 * Note! This range-checks 'previous' (which may be NULL).
-	 * The real range was checked in getdents
-	 */
-	if (!user_access_begin(dirent, sizeof(*dirent)))
-		goto efault;
-	if (dirent)
-		unsafe_put_user(offset, &dirent->d_off, efault_end);
 	dirent = buf->current_dir;
+	prev = (void __user *)dirent - prev_reclen;
+	if (!user_access_begin(prev, reclen + prev_reclen))
+		goto efault;
+
+	/* This might be 'dirent->d_off', but if so it will get overwritten */
+	unsafe_put_user(offset, &prev->d_off, efault_end);
 	unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
 	unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
 	user_access_end();
 
-	buf->previous = dirent;
-	dirent = (void __user *)dirent + reclen;
-	buf->current_dir = dirent;
+	buf->current_dir = (void __user *)dirent + reclen;
+	buf->prev_reclen = reclen;
 	buf->count -= reclen;
 	return 0;
 efault_end:
@@ -267,7 +264,6 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 		struct linux_dirent __user *, dirent, unsigned int, count)
 {
 	struct fd f;
-	struct linux_dirent __user * lastdirent;
 	struct getdents_callback buf = {
 		.ctx.actor = filldir,
 		.count = count,
@@ -285,8 +281,10 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 	error = iterate_dir(f.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
-	lastdirent = buf.previous;
-	if (lastdirent) {
+	if (buf.prev_reclen) {
+		struct linux_dirent __user *lastdirent;
+		lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
+
 		if (put_user(buf.ctx.pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
@@ -299,7 +297,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 struct getdents_callback64 {
 	struct dir_context ctx;
 	struct linux_dirent64 __user * current_dir;
-	struct linux_dirent64 __user * previous;
+	int prev_reclen;
 	int count;
 	int error;
 };
@@ -307,11 +305,12 @@ struct getdents_callback64 {
 static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 		     loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct linux_dirent64 __user *dirent;
+	struct linux_dirent64 __user *dirent, *prev;
 	struct getdents_callback64 *buf =
 		container_of(ctx, struct getdents_callback64, ctx);
 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
 		sizeof(u64));
+	int prev_reclen;
 
 	buf->error = verify_dirent_name(name, namlen);
 	if (unlikely(buf->error))
@@ -319,30 +318,28 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	buf->error = -EINVAL;	/* only used if we fail.. */
 	if (reclen > buf->count)
 		return -EINVAL;
-	dirent = buf->previous;
-	if (dirent && signal_pending(current))
+	prev_reclen = buf->prev_reclen;
+	if (prev_reclen && signal_pending(current))
 		return -EINTR;
-
-	/*
-	 * Note! This range-checks 'previous' (which may be NULL).
-	 * The real range was checked in getdents
-	 */
-	if (!user_access_begin(dirent, sizeof(*dirent)))
-		goto efault;
-	if (dirent)
-		unsafe_put_user(offset, &dirent->d_off, efault_end);
 	dirent = buf->current_dir;
+	prev = (void __user *)dirent - prev_reclen;
+	if (!user_access_begin(prev, reclen + prev_reclen))
+		goto efault;
+
+	/* This might be 'dirent->d_off', but if so it will get overwritten */
+	unsafe_put_user(offset, &prev->d_off, efault_end);
 	unsafe_put_user(ino, &dirent->d_ino, efault_end);
 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
 	unsafe_put_user(d_type, &dirent->d_type, efault_end);
 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
 	user_access_end();
 
-	buf->previous = dirent;
+	buf->prev_reclen = reclen;
 	dirent = (void __user *)dirent + reclen;
 	buf->current_dir = dirent;
 	buf->count -= reclen;
 	return 0;
+
 efault_end:
 	user_access_end();
 efault:
@@ -354,7 +351,6 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
 		    unsigned int count)
 {
 	struct fd f;
-	struct linux_dirent64 __user * lastdirent;
 	struct getdents_callback64 buf = {
 		.ctx.actor = filldir64,
 		.count = count,
@@ -372,9 +368,11 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
 	error = iterate_dir(f.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
-	lastdirent = buf.previous;
-	if (lastdirent) {
+	if (buf.prev_reclen) {
+		struct linux_dirent64 __user *lastdirent;
 		typeof(lastdirent->d_off) d_off = buf.ctx.pos;
+
+		lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
 		if (__put_user(d_off, &lastdirent->d_off))
 			error = -EFAULT;
 		else
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not
  2020-01-23 12:59 [PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin() Christophe Leroy
@ 2020-01-23 12:59 ` Christophe Leroy
  2020-01-23 13:11   ` Jani Nikula
                     ` (2 more replies)
  2020-01-23 12:59 ` [PATCH v3 3/7] powerpc/32s: Fix bad_kuap_fault() Christophe Leroy
                   ` (4 subsequent siblings)
  5 siblings, 3 replies; 14+ messages in thread
From: Christophe Leroy @ 2020-01-23 12:59 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Linus Torvalds, Alexander Viro, Andrew Morton, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, H. Peter Anvin, Jani Nikula
  Cc: linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm, dri-devel, x86

On 32 bits powerPC (book3s/32), only write accesses to user are
protected and there is no point spending time on unlocking for reads.

On 64 bits powerpc (book3s/64 at least), access can be granted
read only, write only or read/write.

Add an argument to user_access_begin() to tell when it's for write and
return an opaque key that will be used by user_access_end() to know
what was done by user_access_begin().

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v3: new
---
 arch/x86/include/asm/uaccess.h                 |  5 +++--
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 15 ++++++++++-----
 fs/readdir.c                                   | 16 ++++++++++------
 include/linux/uaccess.h                        |  4 ++--
 kernel/compat.c                                | 16 ++++++++++------
 kernel/exit.c                                  | 17 +++++++++++------
 lib/strncpy_from_user.c                        |  6 ++++--
 lib/strnlen_user.c                             |  6 ++++--
 lib/usercopy.c                                 |  8 +++++---
 9 files changed, 59 insertions(+), 34 deletions(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 61d93f062a36..05eccdc0366a 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -709,7 +709,8 @@ extern struct movsl_mask {
  * checking before using them, but you have to surround them with the
  * user_access_begin/end() pair.
  */
-static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline unsigned long
+user_access_begin(const void __user *ptr, size_t len, bool write)
 {
 	if (unlikely(!access_ok(ptr,len)))
 		return 0;
@@ -717,7 +718,7 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt
 	return 1;
 }
 #define user_access_begin(a,b)	user_access_begin(a,b)
-#define user_access_end()	__uaccess_end()
+#define user_access_end(x)	__uaccess_end()
 
 #define user_access_save()	smap_save()
 #define user_access_restore(x)	smap_restore(x)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index bc3a67226163..509bfb6116ac 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1615,6 +1615,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
 	const unsigned int count = eb->buffer_count;
 	unsigned int i;
 	int err;
+	unsigned long key;
 
 	for (i = 0; i < count; i++) {
 		const unsigned int nreloc = eb->exec[i].relocation_count;
@@ -1662,14 +1663,15 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
 		 * happened we would make the mistake of assuming that the
 		 * relocations were valid.
 		 */
-		if (!user_access_begin(urelocs, size))
+		key = user_access_begin(urelocs, size, true);
+		if (!key)
 			goto end;
 
 		for (copied = 0; copied < nreloc; copied++)
 			unsafe_put_user(-1,
 					&urelocs[copied].presumed_offset,
 					end_user);
-		user_access_end();
+		user_access_end(key);
 
 		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
 	}
@@ -1677,7 +1679,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
 	return 0;
 
 end_user:
-	user_access_end();
+	user_access_end(key);
 end:
 	kvfree(relocs);
 	err = -EFAULT;
@@ -2906,6 +2908,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
 			u64_to_user_ptr(args->buffers_ptr);
 		unsigned int i;
+		unsigned long key;
 
 		/* Copy the new buffer offsets back to the user's exec list. */
 		/*
@@ -2915,7 +2918,9 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		 * And this range already got effectively checked earlier
 		 * when we did the "copy_from_user()" above.
 		 */
-		if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
+		key = user_access_begin(user_exec_list,
+					count * sizeof(*user_exec_list), true);
+		if (!key)
 			goto end;
 
 		for (i = 0; i < args->buffer_count; i++) {
@@ -2929,7 +2934,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 					end_user);
 		}
 end_user:
-		user_access_end();
+		user_access_end(key);
 end:;
 	}
 
diff --git a/fs/readdir.c b/fs/readdir.c
index 4b466cbb0f3a..47b9ef97e16e 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -221,6 +221,7 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 	int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
 		sizeof(long));
 	int prev_reclen;
+	unsigned long key;
 
 	buf->error = verify_dirent_name(name, namlen);
 	if (unlikely(buf->error))
@@ -238,7 +239,8 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 		return -EINTR;
 	dirent = buf->current_dir;
 	prev = (void __user *)dirent - prev_reclen;
-	if (!user_access_begin(prev, reclen + prev_reclen))
+	key = user_access_begin(prev, reclen + prev_reclen, true);
+	if (!key)
 		goto efault;
 
 	/* This might be 'dirent->d_off', but if so it will get overwritten */
@@ -247,14 +249,14 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
 	unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
-	user_access_end();
+	user_access_end(key);
 
 	buf->current_dir = (void __user *)dirent + reclen;
 	buf->prev_reclen = reclen;
 	buf->count -= reclen;
 	return 0;
 efault_end:
-	user_access_end();
+	user_access_end(key);
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;
@@ -311,6 +313,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
 		sizeof(u64));
 	int prev_reclen;
+	unsigned long key;
 
 	buf->error = verify_dirent_name(name, namlen);
 	if (unlikely(buf->error))
@@ -323,7 +326,8 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 		return -EINTR;
 	dirent = buf->current_dir;
 	prev = (void __user *)dirent - prev_reclen;
-	if (!user_access_begin(prev, reclen + prev_reclen))
+	key = user_access_begin(prev, reclen + prev_reclen, true);
+	if (!key)
 		goto efault;
 
 	/* This might be 'dirent->d_off', but if so it will get overwritten */
@@ -332,7 +336,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
 	unsafe_put_user(d_type, &dirent->d_type, efault_end);
 	unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
-	user_access_end();
+	user_access_end(key);
 
 	buf->prev_reclen = reclen;
 	dirent = (void __user *)dirent + reclen;
@@ -341,7 +345,7 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 	return 0;
 
 efault_end:
-	user_access_end();
+	user_access_end(key);
 efault:
 	buf->error = -EFAULT;
 	return -EFAULT;
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 67f016010aad..394f5029a727 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -369,8 +369,8 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
 	probe_kernel_read(&retval, addr, sizeof(retval))
 
 #ifndef user_access_begin
-#define user_access_begin(ptr,len) access_ok(ptr, len)
-#define user_access_end() do { } while (0)
+#define user_access_begin(ptr, len, write) access_ok(ptr, len)
+#define user_access_end(x) do { } while (0)
 #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
 #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e)
 #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
diff --git a/kernel/compat.c b/kernel/compat.c
index 95005f849c68..4bcbe1cd761b 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -258,12 +258,14 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size)
 {
 	unsigned long nr_compat_longs;
+	unsigned long key;
 
 	/* align bitmap up to nearest compat_long_t boundary */
 	bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
 	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
 
-	if (!user_access_begin(umask, bitmap_size / 8))
+	key = user_access_begin(umask, bitmap_size / 8, false);
+	if (!key)
 		return -EFAULT;
 
 	while (nr_compat_longs > 1) {
@@ -275,11 +277,11 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 	}
 	if (nr_compat_longs)
 		unsafe_get_user(*mask, umask++, Efault);
-	user_access_end();
+	user_access_end(key);
 	return 0;
 
 Efault:
-	user_access_end();
+	user_access_end(key);
 	return -EFAULT;
 }
 
@@ -287,12 +289,14 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size)
 {
 	unsigned long nr_compat_longs;
+	unsigned long key;
 
 	/* align bitmap up to nearest compat_long_t boundary */
 	bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
 	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
 
-	if (!user_access_begin(umask, bitmap_size / 8))
+	key = user_access_begin(umask, bitmap_size / 8, true);
+	if (!key)
 		return -EFAULT;
 
 	while (nr_compat_longs > 1) {
@@ -303,10 +307,10 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 	}
 	if (nr_compat_longs)
 		unsafe_put_user((compat_ulong_t)*mask, umask++, Efault);
-	user_access_end();
+	user_access_end(key);
 	return 0;
 Efault:
-	user_access_end();
+	user_access_end(key);
 	return -EFAULT;
 }
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 2833ffb0c211..1cb9c8a879d2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1553,6 +1553,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	struct waitid_info info = {.status = 0};
 	long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
 	int signo = 0;
+	unsigned long key;
 
 	if (err > 0) {
 		signo = SIGCHLD;
@@ -1563,7 +1564,8 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	if (!infop)
 		return err;
 
-	if (!user_access_begin(infop, sizeof(*infop)))
+	key = user_access_begin(infop, sizeof(*infop), true);
+	if (!key)
 		return -EFAULT;
 
 	unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1572,10 +1574,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	unsafe_put_user(info.pid, &infop->si_pid, Efault);
 	unsafe_put_user(info.uid, &infop->si_uid, Efault);
 	unsafe_put_user(info.status, &infop->si_status, Efault);
-	user_access_end();
+	user_access_end(key);
 	return err;
 Efault:
-	user_access_end();
+	user_access_end(key);
 	return -EFAULT;
 }
 
@@ -1673,6 +1675,8 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	struct waitid_info info = {.status = 0};
 	long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL);
 	int signo = 0;
+	unsigned long key;
+
 	if (err > 0) {
 		signo = SIGCHLD;
 		err = 0;
@@ -1690,7 +1694,8 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	if (!infop)
 		return err;
 
-	if (!user_access_begin(infop, sizeof(*infop)))
+	key = user_access_begin(infop, sizeof(*infop), true);
+	if (!key)
 		return -EFAULT;
 
 	unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1699,10 +1704,10 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	unsafe_put_user(info.pid, &infop->si_pid, Efault);
 	unsafe_put_user(info.uid, &infop->si_uid, Efault);
 	unsafe_put_user(info.status, &infop->si_status, Efault);
-	user_access_end();
+	user_access_end(key);
 	return err;
 Efault:
-	user_access_end();
+	user_access_end(key);
 	return -EFAULT;
 }
 #endif
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index dccb95af6003..7184fb766439 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -113,12 +113,14 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 	if (likely(src_addr < max_addr)) {
 		unsigned long max = max_addr - src_addr;
 		long retval;
+		unsigned long key;
 
 		kasan_check_write(dst, count);
 		check_object_size(dst, count, false);
-		if (user_access_begin(src, max)) {
+		key = user_access_begin(src, max, false);
+		if (key) {
 			retval = do_strncpy_from_user(dst, src, count, max);
-			user_access_end();
+			user_access_end(key);
 			return retval;
 		}
 	}
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 6c0005d5dd5c..819e355b8608 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -108,10 +108,12 @@ long strnlen_user(const char __user *str, long count)
 	if (likely(src_addr < max_addr)) {
 		unsigned long max = max_addr - src_addr;
 		long retval;
+		unsigned long key;
 
-		if (user_access_begin(str, max)) {
+		key = user_access_begin(str, max, false);
+		if (key) {
 			retval = do_strnlen_user(str, count, max);
-			user_access_end();
+			user_access_end(key);
 			return retval;
 		}
 	}
diff --git a/lib/usercopy.c b/lib/usercopy.c
index cbb4d9ec00f2..9e03ca88ad32 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -51,6 +51,7 @@ int check_zeroed_user(const void __user *from, size_t size)
 {
 	unsigned long val;
 	uintptr_t align = (uintptr_t) from % sizeof(unsigned long);
+	unsigned long key;
 
 	if (unlikely(size == 0))
 		return 1;
@@ -58,7 +59,8 @@ int check_zeroed_user(const void __user *from, size_t size)
 	from -= align;
 	size += align;
 
-	if (!user_access_begin(from, size))
+	key = user_access_begin(from, size, false);
+	if (!key)
 		return -EFAULT;
 
 	unsafe_get_user(val, (unsigned long __user *) from, err_fault);
@@ -79,10 +81,10 @@ int check_zeroed_user(const void __user *from, size_t size)
 		val &= aligned_byte_mask(size);
 
 done:
-	user_access_end();
+	user_access_end(key);
 	return (val == 0);
 err_fault:
-	user_access_end();
+	user_access_end(key);
 	return -EFAULT;
 }
 EXPORT_SYMBOL(check_zeroed_user);
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 3/7] powerpc/32s: Fix bad_kuap_fault()
  2020-01-23 12:59 [PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin() Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not Christophe Leroy
@ 2020-01-23 12:59 ` Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 4/7] powerpc/kuap: Fix set direction in allow/prevent_user_access() Christophe Leroy
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Christophe Leroy @ 2020-01-23 12:59 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm

At the moment, bad_kuap_fault() reports a fault only if a bad access
to userspace occurred while access to userspace was not granted.

But if a fault occurs for a write outside the allowed userspace
segment(s) that have been unlocked, bad_kuap_fault() fails to
detect it and the kernel loops forever in do_page_fault().

Fix it by checking that the accessed address is within the allowed
range.

Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v2: added missing address parametre to bad_kuap_fault() in asm/kup.h
v3: no change
---
 arch/powerpc/include/asm/book3s/32/kup.h       | 9 +++++++--
 arch/powerpc/include/asm/book3s/64/kup-radix.h | 3 ++-
 arch/powerpc/include/asm/kup.h                 | 6 +++++-
 arch/powerpc/include/asm/nohash/32/kup-8xx.h   | 3 ++-
 arch/powerpc/mm/fault.c                        | 2 +-
 5 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index f9dc597b0b86..d88008c8eb85 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -131,12 +131,17 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
 	kuap_update_sr(mfsrin(addr) | SR_KS, addr, end);	/* set Ks */
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
+	unsigned long begin = regs->kuap & 0xf0000000;
+	unsigned long end = regs->kuap << 28;
+
 	if (!is_write)
 		return false;
 
-	return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !");
+	return WARN(address < begin || address >= end,
+		    "Bug: write fault blocked by segment registers !");
 }
 
 #endif /* CONFIG_PPC_KUAP */
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f254de956d6a..dbbd22cb80f5 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -95,7 +95,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
 	set_kuap(AMR_KUAP_BLOCKED);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
 		    (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 5b5e39643a27..812e66f31934 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -45,7 +45,11 @@ static inline void allow_user_access(void __user *to, const void __user *from,
 				     unsigned long size) { }
 static inline void prevent_user_access(void __user *to, const void __user *from,
 				       unsigned long size) { }
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; }
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	return false;
+}
 #endif /* CONFIG_PPC_KUAP */
 
 static inline void allow_read_from_user(const void __user *from, unsigned long size)
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1006a427e99c..f2fea603b929 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -46,7 +46,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
 	mtspr(SPRN_MD_AP, MD_APG_KUAP);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
 		    "Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b5047f9b5dec..1baeb045f7f4 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 
 	// Read/write fault in a valid region (the exception table search passed
 	// above), but blocked by KUAP is bad, it can never succeed.
-	if (bad_kuap_fault(regs, is_write))
+	if (bad_kuap_fault(regs, address, is_write))
 		return true;
 
 	// What's left? Kernel fault on user in well defined regions (extable
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 4/7] powerpc/kuap: Fix set direction in allow/prevent_user_access()
  2020-01-23 12:59 [PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin() Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 3/7] powerpc/32s: Fix bad_kuap_fault() Christophe Leroy
@ 2020-01-23 12:59 ` Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 5/7] powerpc/32s: Drop NULL addr verification Christophe Leroy
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Christophe Leroy @ 2020-01-23 12:59 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm

__builtin_constant_p() always return 0 for pointers, so on RADIX
we always end up opening both direction (by writing 0 in SPR29):

0000000000000170 <._copy_to_user>:
...
 1b0:	4c 00 01 2c 	isync
 1b4:	39 20 00 00 	li      r9,0
 1b8:	7d 3d 03 a6 	mtspr   29,r9
 1bc:	4c 00 01 2c 	isync
 1c0:	48 00 00 01 	bl      1c0 <._copy_to_user+0x50>
			1c0: R_PPC64_REL24	.__copy_tofrom_user
...
0000000000000220 <._copy_from_user>:
...
 2ac:	4c 00 01 2c 	isync
 2b0:	39 20 00 00 	li      r9,0
 2b4:	7d 3d 03 a6 	mtspr   29,r9
 2b8:	4c 00 01 2c 	isync
 2bc:	7f c5 f3 78 	mr      r5,r30
 2c0:	7f 83 e3 78 	mr      r3,r28
 2c4:	48 00 00 01 	bl      2c4 <._copy_from_user+0xa4>
			2c4: R_PPC64_REL24	.__copy_tofrom_user
...

Use an explicit parameter for direction selection, so that GCC
is able to see it is a constant:

00000000000001b0 <._copy_to_user>:
...
 1f0:	4c 00 01 2c 	isync
 1f4:	3d 20 40 00 	lis     r9,16384
 1f8:	79 29 07 c6 	rldicr  r9,r9,32,31
 1fc:	7d 3d 03 a6 	mtspr   29,r9
 200:	4c 00 01 2c 	isync
 204:	48 00 00 01 	bl      204 <._copy_to_user+0x54>
			204: R_PPC64_REL24	.__copy_tofrom_user
...
0000000000000260 <._copy_from_user>:
...
 2ec:	4c 00 01 2c 	isync
 2f0:	39 20 ff ff 	li      r9,-1
 2f4:	79 29 00 04 	rldicr  r9,r9,0,0
 2f8:	7d 3d 03 a6 	mtspr   29,r9
 2fc:	4c 00 01 2c 	isync
 300:	7f c5 f3 78 	mr      r5,r30
 304:	7f 83 e3 78 	mr      r3,r28
 308:	48 00 00 01 	bl      308 <._copy_from_user+0xa8>
			308: R_PPC64_REL24	.__copy_tofrom_user
...

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v2: no change
v3: no change
---
 arch/powerpc/include/asm/book3s/32/kup.h      | 13 ++++++--
 .../powerpc/include/asm/book3s/64/kup-radix.h | 11 +++----
 arch/powerpc/include/asm/kup.h                | 30 ++++++++++++++-----
 arch/powerpc/include/asm/nohash/32/kup-8xx.h  |  4 +--
 arch/powerpc/include/asm/uaccess.h            |  4 +--
 5 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index d88008c8eb85..d765515bd1c1 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -102,11 +102,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
 	isync();	/* Context sync required after mtsrin() */
 }
 
-static inline void allow_user_access(void __user *to, const void __user *from, u32 size)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      u32 size, unsigned long dir)
 {
 	u32 addr, end;
 
-	if (__builtin_constant_p(to) && to == NULL)
+	BUILD_BUG_ON(!__builtin_constant_p(dir));
+	if (!(dir & KUAP_W))
 		return;
 
 	addr = (__force u32)to;
@@ -119,11 +121,16 @@ static inline void allow_user_access(void __user *to, const void __user *from, u
 	kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end);	/* Clear Ks */
 }
 
-static inline void prevent_user_access(void __user *to, const void __user *from, u32 size)
+static __always_inline void prevent_user_access(void __user *to, const void __user *from,
+						u32 size, unsigned long dir)
 {
 	u32 addr = (__force u32)to;
 	u32 end = min(addr + size, TASK_SIZE);
 
+	BUILD_BUG_ON(!__builtin_constant_p(dir));
+	if (!(dir & KUAP_W))
+		return;
+
 	if (!addr || addr >= TASK_SIZE || !size)
 		return;
 
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index dbbd22cb80f5..f11315306d41 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -77,20 +77,21 @@ static inline void set_kuap(unsigned long value)
 	isync();
 }
 
-static inline void allow_user_access(void __user *to, const void __user *from,
-				     unsigned long size)
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
 {
 	// This is written so we can resolve to a single case at build time
-	if (__builtin_constant_p(to) && to == NULL)
+	BUILD_BUG_ON(!__builtin_constant_p(dir));
+	if (dir == KUAP_R)
 		set_kuap(AMR_KUAP_BLOCK_WRITE);
-	else if (__builtin_constant_p(from) && from == NULL)
+	else if (dir == KUAP_W)
 		set_kuap(AMR_KUAP_BLOCK_READ);
 	else
 		set_kuap(0);
 }
 
 static inline void prevent_user_access(void __user *to, const void __user *from,
-				       unsigned long size)
+				       unsigned long size, unsigned long dir)
 {
 	set_kuap(AMR_KUAP_BLOCKED);
 }
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 812e66f31934..ff57bfcb88f7 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -2,6 +2,10 @@
 #ifndef _ASM_POWERPC_KUP_H_
 #define _ASM_POWERPC_KUP_H_
 
+#define KUAP_R		1
+#define KUAP_W		2
+#define KUAP_RW		(KUAP_R | KUAP_W)
+
 #ifdef CONFIG_PPC64
 #include <asm/book3s/64/kup-radix.h>
 #endif
@@ -42,9 +46,9 @@ void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
 static inline void allow_user_access(void __user *to, const void __user *from,
-				     unsigned long size) { }
+				     unsigned long size, unsigned long dir) { }
 static inline void prevent_user_access(void __user *to, const void __user *from,
-				       unsigned long size) { }
+				       unsigned long size, unsigned long dir) { }
 static inline bool
 bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
@@ -54,24 +58,36 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 
 static inline void allow_read_from_user(const void __user *from, unsigned long size)
 {
-	allow_user_access(NULL, from, size);
+	allow_user_access(NULL, from, size, KUAP_R);
 }
 
 static inline void allow_write_to_user(void __user *to, unsigned long size)
 {
-	allow_user_access(to, NULL, size);
+	allow_user_access(to, NULL, size, KUAP_W);
+}
+
+static inline void allow_read_write_user(void __user *to, const void __user *from,
+					 unsigned long size)
+{
+	allow_user_access(to, from, size, KUAP_RW);
 }
 
 static inline void prevent_read_from_user(const void __user *from, unsigned long size)
 {
-	prevent_user_access(NULL, from, size);
+	prevent_user_access(NULL, from, size, KUAP_R);
 }
 
 static inline void prevent_write_to_user(void __user *to, unsigned long size)
 {
-	prevent_user_access(to, NULL, size);
+	prevent_user_access(to, NULL, size, KUAP_W);
+}
+
+static inline void prevent_read_write_user(void __user *to, const void __user *from,
+					   unsigned long size)
+{
+	prevent_user_access(to, from, size, KUAP_RW);
 }
 
 #endif /* !__ASSEMBLY__ */
 
-#endif /* _ASM_POWERPC_KUP_H_ */
+#endif /* _ASM_POWERPC_KUAP_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index f2fea603b929..1d70c80366fd 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -35,13 +35,13 @@
 #include <asm/reg.h>
 
 static inline void allow_user_access(void __user *to, const void __user *from,
-				     unsigned long size)
+				     unsigned long size, unsigned long dir)
 {
 	mtspr(SPRN_MD_AP, MD_APG_INIT);
 }
 
 static inline void prevent_user_access(void __user *to, const void __user *from,
-				       unsigned long size)
+				       unsigned long size, unsigned long dir)
 {
 	mtspr(SPRN_MD_AP, MD_APG_KUAP);
 }
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index c92fe7fe9692..cafad1960e76 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -313,9 +313,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
 	unsigned long ret;
 
 	barrier_nospec();
-	allow_user_access(to, from, n);
+	allow_read_write_user(to, from, n);
 	ret = __copy_tofrom_user(to, from, n);
-	prevent_user_access(to, from, n);
+	prevent_read_write_user(to, from, n);
 	return ret;
 }
 #endif /* __powerpc64__ */
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 5/7] powerpc/32s: Drop NULL addr verification
  2020-01-23 12:59 [PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin() Christophe Leroy
                   ` (2 preceding siblings ...)
  2020-01-23 12:59 ` [PATCH v3 4/7] powerpc/kuap: Fix set direction in allow/prevent_user_access() Christophe Leroy
@ 2020-01-23 12:59 ` Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 6/7] powerpc/32s: Prepare allow_user_access() for user_access_begin() Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 7/7] powerpc: Implement user_access_begin and friends Christophe Leroy
  5 siblings, 0 replies; 14+ messages in thread
From: Christophe Leroy @ 2020-01-23 12:59 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm

NULL addr is a user address. Don't waste time checking it. If
someone tries to access it, it will SIGFAULT the same way as for
address 1, so no need to make it special.

The special case is when not doing a write, in that case we want
to drop the entire function. This is now handled by 'dir' param
and not by the nulity of 'to' anymore.

Also make beginning of prevent_user_access() similar
to beginning of allow_user_access(), and tell the compiler
that writing in kernel space or with a 0 length is unlikely

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v2: no change
v3: no change
---
 arch/powerpc/include/asm/book3s/32/kup.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index d765515bd1c1..3c1798e56b55 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -113,7 +113,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user
 
 	addr = (__force u32)to;
 
-	if (!addr || addr >= TASK_SIZE || !size)
+	if (unlikely(addr >= TASK_SIZE || !size))
 		return;
 
 	end = min(addr + size, TASK_SIZE);
@@ -124,16 +124,18 @@ static __always_inline void allow_user_access(void __user *to, const void __user
 static __always_inline void prevent_user_access(void __user *to, const void __user *from,
 						u32 size, unsigned long dir)
 {
-	u32 addr = (__force u32)to;
-	u32 end = min(addr + size, TASK_SIZE);
+	u32 addr, end;
 
 	BUILD_BUG_ON(!__builtin_constant_p(dir));
 	if (!(dir & KUAP_W))
 		return;
 
-	if (!addr || addr >= TASK_SIZE || !size)
+	addr = (__force u32)to;
+
+	if (unlikely(addr >= TASK_SIZE || !size))
 		return;
 
+	end = min(addr + size, TASK_SIZE);
 	current->thread.kuap = 0;
 	kuap_update_sr(mfsrin(addr) | SR_KS, addr, end);	/* set Ks */
 }
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 6/7] powerpc/32s: Prepare allow_user_access() for user_access_begin()
  2020-01-23 12:59 [PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin() Christophe Leroy
                   ` (3 preceding siblings ...)
  2020-01-23 12:59 ` [PATCH v3 5/7] powerpc/32s: Drop NULL addr verification Christophe Leroy
@ 2020-01-23 12:59 ` Christophe Leroy
  2020-01-23 12:59 ` [PATCH v3 7/7] powerpc: Implement user_access_begin and friends Christophe Leroy
  5 siblings, 0 replies; 14+ messages in thread
From: Christophe Leroy @ 2020-01-23 12:59 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm

In preparation of implementing user_access_begin and friends
on powerpc, allow_user_access() need to be prepared for
user_access_begin()

user_access_end() doesn't provide the address and size which
were passed to user_access_begin(), required by prevent_user_access()
to know which segment to modify. But user_access_end() takes an
opaque value returned by user_access_begin().

Make allow_user_access() return the value it writes to current->kuap.
This will allow user_access_end() to recalculate the segment range
without having to read current->kuap.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v3: Replaces the patch "Prepare prevent_user_access() for user_access_end()"
---
 arch/powerpc/include/asm/book3s/32/kup.h       | 15 ++++++++++-----
 arch/powerpc/include/asm/book3s/64/kup-radix.h |  7 +++++--
 arch/powerpc/include/asm/kup.h                 |  8 ++++++--
 arch/powerpc/include/asm/nohash/32/kup-8xx.h   |  6 ++++--
 4 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 3c1798e56b55..128dcbf3a19d 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -102,23 +102,28 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
 	isync();	/* Context sync required after mtsrin() */
 }
 
-static __always_inline void allow_user_access(void __user *to, const void __user *from,
-					      u32 size, unsigned long dir)
+/* Make sure we never return 0. We only use top and bottom 4 bits */
+static __always_inline unsigned long
+allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir)
 {
 	u32 addr, end;
+	unsigned long kuap;
 
 	BUILD_BUG_ON(!__builtin_constant_p(dir));
 	if (!(dir & KUAP_W))
-		return;
+		return 0x100;
 
 	addr = (__force u32)to;
 
 	if (unlikely(addr >= TASK_SIZE || !size))
-		return;
+		return 0x100;
 
 	end = min(addr + size, TASK_SIZE);
-	current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
+	kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
+	current->thread.kuap = kuap;
 	kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end);	/* Clear Ks */
+
+	return kuap;
 }
 
 static __always_inline void prevent_user_access(void __user *to, const void __user *from,
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f11315306d41..183f0c87017b 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -77,8 +77,9 @@ static inline void set_kuap(unsigned long value)
 	isync();
 }
 
-static __always_inline void allow_user_access(void __user *to, const void __user *from,
-					      unsigned long size, unsigned long dir)
+static __always_inline unsigned long
+allow_user_access(void __user *to, const void __user *from,
+		  unsigned long size, unsigned long dir)
 {
 	// This is written so we can resolve to a single case at build time
 	BUILD_BUG_ON(!__builtin_constant_p(dir));
@@ -88,6 +89,8 @@ static __always_inline void allow_user_access(void __user *to, const void __user
 		set_kuap(AMR_KUAP_BLOCK_READ);
 	else
 		set_kuap(0);
+
+	return 1;
 }
 
 static inline void prevent_user_access(void __user *to, const void __user *from,
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index ff57bfcb88f7..691fec5afd4a 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -45,8 +45,12 @@ static inline void setup_kuep(bool disabled) { }
 void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
-static inline void allow_user_access(void __user *to, const void __user *from,
-				     unsigned long size, unsigned long dir) { }
+static inline unsigned long allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
+{
+	return 1;
+}
+
 static inline void prevent_user_access(void __user *to, const void __user *from,
 				       unsigned long size, unsigned long dir) { }
 static inline bool
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1d70c80366fd..ee673d3c0ab6 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -34,10 +34,12 @@
 
 #include <asm/reg.h>
 
-static inline void allow_user_access(void __user *to, const void __user *from,
-				     unsigned long size, unsigned long dir)
+static inline unsigned long allow_user_access(void __user *to, const void __user *from,
+					      unsigned long size, unsigned long dir)
 {
 	mtspr(SPRN_MD_AP, MD_APG_INIT);
+
+	return 1;
 }
 
 static inline void prevent_user_access(void __user *to, const void __user *from,
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 7/7] powerpc: Implement user_access_begin and friends
  2020-01-23 12:59 [PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin() Christophe Leroy
                   ` (4 preceding siblings ...)
  2020-01-23 12:59 ` [PATCH v3 6/7] powerpc/32s: Prepare allow_user_access() for user_access_begin() Christophe Leroy
@ 2020-01-23 12:59 ` Christophe Leroy
  5 siblings, 0 replies; 14+ messages in thread
From: Christophe Leroy @ 2020-01-23 12:59 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm

Today, when a function like strncpy_from_user() is called,
the userspace access protection is de-activated and re-activated
for every word read.

By implementing user_access_begin and friends, the protection
is de-activated at the beginning of the copy and re-activated at the
end.

Implement user_access_begin(), user_access_end() and
unsafe_get_user(), unsafe_put_user() and unsafe_copy_to_user()

For the time being, we keep user_access_save() and
user_access_restore() as nops.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v2: no change
v3: adapted to the new format of user_access_begin/end()
---
 arch/powerpc/include/asm/uaccess.h | 100 ++++++++++++++++++++++++++---
 1 file changed, 90 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index cafad1960e76..30204e80df1b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -91,9 +91,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size,
 	__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
 #define __get_user(x, ptr) \
-	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+	__get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
 #define __put_user(x, ptr) \
-	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true)
+
+#define __get_user_allowed(x, ptr) \
+	__get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
+#define __put_user_allowed(x, ptr) \
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false)
 
 #define __get_user_inatomic(x, ptr) \
 	__get_user_nosleep((x), (ptr), sizeof(*(ptr)))
@@ -138,10 +143,9 @@ extern long __put_user_bad(void);
 		: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
 #endif /* __powerpc64__ */
 
-#define __put_user_size(x, ptr, size, retval)			\
+#define __put_user_size_allowed(x, ptr, size, retval)		\
 do {								\
 	retval = 0;						\
-	allow_write_to_user(ptr, size);				\
 	switch (size) {						\
 	  case 1: __put_user_asm(x, ptr, retval, "stb"); break;	\
 	  case 2: __put_user_asm(x, ptr, retval, "sth"); break;	\
@@ -149,17 +153,26 @@ do {								\
 	  case 8: __put_user_asm2(x, ptr, retval); break;	\
 	  default: __put_user_bad();				\
 	}							\
+} while (0)
+
+#define __put_user_size(x, ptr, size, retval)			\
+do {								\
+	allow_write_to_user(ptr, size);				\
+	__put_user_size_allowed(x, ptr, size, retval);		\
 	prevent_write_to_user(ptr, size);			\
 } while (0)
 
-#define __put_user_nocheck(x, ptr, size)			\
+#define __put_user_nocheck(x, ptr, size, allow)			\
 ({								\
 	long __pu_err;						\
 	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
 	if (!is_kernel_addr((unsigned long)__pu_addr))		\
 		might_fault();					\
 	__chk_user_ptr(ptr);					\
-	__put_user_size((x), __pu_addr, (size), __pu_err);	\
+	if (allow)								\
+		__put_user_size((x), __pu_addr, (size), __pu_err);		\
+	else									\
+		__put_user_size_allowed((x), __pu_addr, (size), __pu_err);	\
 	__pu_err;						\
 })
 
@@ -236,13 +249,12 @@ extern long __get_user_bad(void);
 		: "b" (addr), "i" (-EFAULT), "0" (err))
 #endif /* __powerpc64__ */
 
-#define __get_user_size(x, ptr, size, retval)			\
+#define __get_user_size_allowed(x, ptr, size, retval)		\
 do {								\
 	retval = 0;						\
 	__chk_user_ptr(ptr);					\
 	if (size > sizeof(x))					\
 		(x) = __get_user_bad();				\
-	allow_read_from_user(ptr, size);			\
 	switch (size) {						\
 	case 1: __get_user_asm(x, ptr, retval, "lbz"); break;	\
 	case 2: __get_user_asm(x, ptr, retval, "lhz"); break;	\
@@ -250,6 +262,12 @@ do {								\
 	case 8: __get_user_asm2(x, ptr, retval);  break;	\
 	default: (x) = __get_user_bad();			\
 	}							\
+} while (0)
+
+#define __get_user_size(x, ptr, size, retval)			\
+do {								\
+	allow_read_from_user(ptr, size);			\
+	__get_user_size_allowed(x, ptr, size, retval);		\
 	prevent_read_from_user(ptr, size);			\
 } while (0)
 
@@ -260,7 +278,7 @@ do {								\
 #define __long_type(x) \
 	__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 
-#define __get_user_nocheck(x, ptr, size)			\
+#define __get_user_nocheck(x, ptr, size, allow)			\
 ({								\
 	long __gu_err;						\
 	__long_type(*(ptr)) __gu_val;				\
@@ -269,7 +287,10 @@ do {								\
 	if (!is_kernel_addr((unsigned long)__gu_addr))		\
 		might_fault();					\
 	barrier_nospec();					\
-	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
+	if (allow)								\
+		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);		\
+	else									\
+		__get_user_size_allowed(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
@@ -387,6 +408,34 @@ static inline unsigned long raw_copy_to_user(void __user *to,
 	return ret;
 }
 
+static inline unsigned long
+raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n)
+{
+	unsigned long ret;
+	if (__builtin_constant_p(n) && (n) <= 8) {
+		ret = 1;
+
+		switch (n) {
+		case 1:
+			__put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret);
+			break;
+		case 2:
+			__put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret);
+			break;
+		case 4:
+			__put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret);
+			break;
+		case 8:
+			__put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret);
+			break;
+		}
+		if (ret == 0)
+			return 0;
+	}
+
+	return __copy_tofrom_user(to, (__force const void __user *)from, n);
+}
+
 static __always_inline unsigned long __must_check
 copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
 {
@@ -428,4 +477,35 @@ extern long __copy_from_user_flushcache(void *dst, const void __user *src,
 extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
 			   size_t len);
 
+static __must_check inline unsigned long
+user_access_begin(const void __user *ptr, size_t len, bool write)
+{
+	if (unlikely(!access_ok(ptr, len)))
+		return 0;
+	return allow_user_access((void __user *)ptr, ptr, len, write ? KUAP_RW : KUAP_R);
+}
+#define user_access_begin	user_access_begin
+
+static inline void user_access_end(unsigned long key)
+{
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_32)) {
+		void __user *ptr = (__force void __user *)(key & 0xf0000000);
+		u32 size = (key << 28) - (key & 0xf0000000);
+
+		prevent_user_access(ptr, ptr, size, key & 0xf000000f ? KUAP_RW : KUAP_R);
+	} else {
+		prevent_user_access(NULL, NULL, ~0UL, KUAP_RW);
+	}
+}
+#define user_access_end		user_access_end
+
+static inline unsigned long user_access_save(void) { return 0UL; }
+static inline void user_access_restore(unsigned long flags) { }
+
+#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
+#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
+#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e)
+#define unsafe_copy_to_user(d, s, l, e) \
+	unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e)
+
 #endif	/* _ARCH_POWERPC_UACCESS_H */
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not
  2020-01-23 12:59 ` [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not Christophe Leroy
@ 2020-01-23 13:11   ` Jani Nikula
  2020-01-23 18:02   ` Linus Torvalds
  2020-01-25 14:40   ` kbuild test robot
  2 siblings, 0 replies; 14+ messages in thread
From: Jani Nikula @ 2020-01-23 13:11 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, Linus Torvalds, Alexander Viro, Andrew Morton,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin
  Cc: linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm, dri-devel, x86

On Thu, 23 Jan 2020, Christophe Leroy <christophe.leroy@c-s.fr> wrote:
> On 32 bits powerPC (book3s/32), only write accesses to user are
> protected and there is no point spending time on unlocking for reads.
>
> On 64 bits powerpc (book3s/64 at least), access can be granted
> read only, write only or read/write.
>
> Add an argument to user_access_begin() to tell when it's for write and
> return an opaque key that will be used by user_access_end() to know
> what was done by user_access_begin().

IMHO an opaque key is a prime example of a case where the use of an
opaque typedef is warranted. Nobody needs to know or care it's
specifically an unsigned long.

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Graphics Center

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not
  2020-01-23 12:59 ` [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not Christophe Leroy
  2020-01-23 13:11   ` Jani Nikula
@ 2020-01-23 18:02   ` Linus Torvalds
  2020-01-23 19:47     ` christophe leroy
  2020-01-25  6:17     ` Tony Luck
  2020-01-25 14:40   ` kbuild test robot
  2 siblings, 2 replies; 14+ messages in thread
From: Linus Torvalds @ 2020-01-23 18:02 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Alexander Viro, Andrew Morton, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, H. Peter Anvin, Jani Nikula,
	Linux Kernel Mailing List, linuxppc-dev, linux-fsdevel, Linux-MM,
	dri-devel, the arch/x86 maintainers

On Thu, Jan 23, 2020 at 4:59 AM Christophe Leroy
<christophe.leroy@c-s.fr> wrote:
>
> On 32 bits powerPC (book3s/32), only write accesses to user are
> protected and there is no point spending time on unlocking for reads.

Honestly, I'm starting to think that 32-bit ppc just needs to look
more like everybody else, than make these changes.

We used to have a read/write argument to the old "verify_area()" and
"access_ok()" model, and it was a mistake. It was due to odd i386 user
access issues. We got rid of it. I'm not convinced this is any better
- it looks very similar and for odd ppc access issues.

But if we really do want to do this, then:

> Add an argument to user_access_begin() to tell when it's for write and
> return an opaque key that will be used by user_access_end() to know
> what was done by user_access_begin().

You should make it more opaque than "unsigned long".

Also, it shouldn't be a "is this a write". What if it's a read _and_ a
write? Only a write? Only a read?

                    Linus

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not
  2020-01-23 18:02   ` Linus Torvalds
@ 2020-01-23 19:47     ` christophe leroy
  2020-01-23 19:57       ` Linus Torvalds
  2020-01-25  6:17     ` Tony Luck
  1 sibling, 1 reply; 14+ messages in thread
From: christophe leroy @ 2020-01-23 19:47 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Alexander Viro, Andrew Morton, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, H. Peter Anvin, Jani Nikula,
	Linux Kernel Mailing List, linuxppc-dev, linux-fsdevel, Linux-MM,
	dri-devel, the arch/x86 maintainers



Le 23/01/2020 à 19:02, Linus Torvalds a écrit :
> On Thu, Jan 23, 2020 at 4:59 AM Christophe Leroy
> <christophe.leroy@c-s.fr> wrote:
>>
>> On 32 bits powerPC (book3s/32), only write accesses to user are
>> protected and there is no point spending time on unlocking for reads.
> 
> Honestly, I'm starting to think that 32-bit ppc just needs to look
> more like everybody else, than make these changes.

Well, beside ppc32, I was also seen it as an opportunity for the modern 
ppc64. On it, you can unlock either read or write or both. And this is 
what is done for get_user() / put_user() and friends: unlock only reads 
for get_user() and only writes for put_user().

Could also be a compromise between performance and security: keeping 
reads allowed at all time and only protect against writes on modern 
architectures which support it like ppc64.

> 
> We used to have a read/write argument to the old "verify_area()" and
> "access_ok()" model, and it was a mistake. It was due to odd i386 user
> access issues. We got rid of it. I'm not convinced this is any better
> - it looks very similar and for odd ppc access issues.

I'm going to leave it aside, at least for the time being, and do it as a 
second step later after evaluating the real performance impact. I'll 
respin tomorrow in that way.

> 
> But if we really do want to do this, then:

Indeed I took the idea from a discussion in last Octobre (Subject: 
"book3s/32 KUAP (was Re: [PATCH] Convert filldir[64]() from __put_user() 
to unsafe_put_user())" )

https://lore.kernel.org/lkml/87h84avffi.fsf@mpe.ellerman.id.au/


> 
>> Add an argument to user_access_begin() to tell when it's for write and
>> return an opaque key that will be used by user_access_end() to know
>> what was done by user_access_begin().
> 
> You should make it more opaque than "unsigned long".
> 
> Also, it shouldn't be a "is this a write". What if it's a read _and_ a
> write? Only a write? Only a read?

Indeed that was more: does it includes a write. It's either RO or RW

Christophe

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not
  2020-01-23 19:47     ` christophe leroy
@ 2020-01-23 19:57       ` Linus Torvalds
  2020-01-24  2:03         ` hpa
  0 siblings, 1 reply; 14+ messages in thread
From: Linus Torvalds @ 2020-01-23 19:57 UTC (permalink / raw)
  To: christophe leroy
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Alexander Viro, Andrew Morton, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, H. Peter Anvin, Jani Nikula,
	Linux Kernel Mailing List, linuxppc-dev, linux-fsdevel, Linux-MM,
	dri-devel, the arch/x86 maintainers

On Thu, Jan 23, 2020 at 11:47 AM christophe leroy
<christophe.leroy@c-s.fr> wrote:
>
> I'm going to leave it aside, at least for the time being, and do it as a
> second step later after evaluating the real performance impact. I'll
> respin tomorrow in that way.

Ok, good.

From a "narrow the access window type" standpoint it does seem to be a
good idea to specify what kind of user accesses will be done, so I
don't hate the idea, it's more that I'm not convinced it matters
enough.

On x86, we have made the rule that user_access_begin/end() can contain
_very_ few operations, and objtool really does enforce that. With
objtool and KASAN, you really end up with very small ranges of
user_access_begin/end().

And since we actually verify it statically on x86-64, I would say that
the added benefit of narrowing by access type is fairly small. We're
not going to have complicated code in that user access region, at
least in generic code.

> > Also, it shouldn't be a "is this a write". What if it's a read _and_ a
> > write? Only a write? Only a read?
>
> Indeed that was more: does it includes a write. It's either RO or RW

I would expect that most actual users would be RO or WO, so it's a bit
odd to have those choices.

Of course, often writing ends up requiring read permissions anyway if
the architecture has problems with alignment handling or similar, but
still... The real RW case does exist conceptually (we have
"copy_in_user()", after all), but still feels like it shouldn't be
seen as the only _interface_ choice.

IOW, an architecture may decide to turn WO into RW because of
architecture limitations (or, like x86 and arm, ignore the whole
RO/RW/WO _entirely_ because there's just a single "allow user space
accesses" flag), but on an interface layer if we add this flag, I
really think it should be an explicit "read or write or both".

So thus my "let's try to avoid doing it in the first place, but if we
_do_ do this, then do it right" plea.

                 Linus

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not
  2020-01-23 19:57       ` Linus Torvalds
@ 2020-01-24  2:03         ` hpa
  0 siblings, 0 replies; 14+ messages in thread
From: hpa @ 2020-01-24  2:03 UTC (permalink / raw)
  To: Linus Torvalds, christophe leroy
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	Alexander Viro, Andrew Morton, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Jani Nikula, Linux Kernel Mailing List,
	linuxppc-dev, linux-fsdevel, Linux-MM, dri-devel,
	the arch/x86 maintainers

On January 23, 2020 11:57:57 AM PST, Linus Torvalds <torvalds@linux-foundation.org> wrote:
>On Thu, Jan 23, 2020 at 11:47 AM christophe leroy
><christophe.leroy@c-s.fr> wrote:
>>
>> I'm going to leave it aside, at least for the time being, and do it
>as a
>> second step later after evaluating the real performance impact. I'll
>> respin tomorrow in that way.
>
>Ok, good.
>
>From a "narrow the access window type" standpoint it does seem to be a
>good idea to specify what kind of user accesses will be done, so I
>don't hate the idea, it's more that I'm not convinced it matters
>enough.
>
>On x86, we have made the rule that user_access_begin/end() can contain
>_very_ few operations, and objtool really does enforce that. With
>objtool and KASAN, you really end up with very small ranges of
>user_access_begin/end().
>
>And since we actually verify it statically on x86-64, I would say that
>the added benefit of narrowing by access type is fairly small. We're
>not going to have complicated code in that user access region, at
>least in generic code.
>
>> > Also, it shouldn't be a "is this a write". What if it's a read
>_and_ a
>> > write? Only a write? Only a read?
>>
>> Indeed that was more: does it includes a write. It's either RO or RW
>
>I would expect that most actual users would be RO or WO, so it's a bit
>odd to have those choices.
>
>Of course, often writing ends up requiring read permissions anyway if
>the architecture has problems with alignment handling or similar, but
>still... The real RW case does exist conceptually (we have
>"copy_in_user()", after all), but still feels like it shouldn't be
>seen as the only _interface_ choice.
>
>IOW, an architecture may decide to turn WO into RW because of
>architecture limitations (or, like x86 and arm, ignore the whole
>RO/RW/WO _entirely_ because there's just a single "allow user space
>accesses" flag), but on an interface layer if we add this flag, I
>really think it should be an explicit "read or write or both".
>
>So thus my "let's try to avoid doing it in the first place, but if we
>_do_ do this, then do it right" plea.
>
>                 Linus

I'm wondering if we should make it a static part of the API instead of a variable.

I have *deep* concern with carrying state in a "key" variable: it's a direct attack vector for a crowbar attack, especially since it is by definition live inside a user access region.

One major reason x86 restricts the regions like this is to minimize the amount of unconstrained state: we don't save and restore the state around, but enter and exit unconditionally, which means that a leaked state will end up having a limited lifespan. Nor is there any state inside the user access region which could be corrupted to leave the region open.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not
  2020-01-23 18:02   ` Linus Torvalds
  2020-01-23 19:47     ` christophe leroy
@ 2020-01-25  6:17     ` Tony Luck
  1 sibling, 0 replies; 14+ messages in thread
From: Tony Luck @ 2020-01-25  6:17 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, Alexander Viro, Andrew Morton, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, H. Peter Anvin, Jani Nikula,
	Linux Kernel Mailing List, linuxppc-dev, linux-fsdevel, Linux-MM,
	dri-devel, the arch/x86 maintainers

On Thu, Jan 23, 2020 at 10:03 AM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> We used to have a read/write argument to the old "verify_area()" and
> "access_ok()" model, and it was a mistake. It was due to odd i386 user
> access issues. We got rid of it. I'm not convinced this is any better
> - it looks very similar and for odd ppc access issues.

If the mode (read or write) were made visible to the trap handler, I'd
find that useful for machine check recovery.  If I'm in the middle of a
copy_from_user() and I get a machine check reading poison from a
user address ... then I could try to recover in the same way as for the
user accessing the poison (offline the page, SIGBUS the task). But if
the poison is in kernel memory and we are doing a copy_to_user(), then
we are hosed (or would need some more complex recovery plan).

[Note that we only get recoverable machine checks on loads... writes
are posted, so if something goes wrong it isn't synchronous with the store
instruction that initiated it]

-Tony

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not
  2020-01-23 12:59 ` [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not Christophe Leroy
  2020-01-23 13:11   ` Jani Nikula
  2020-01-23 18:02   ` Linus Torvalds
@ 2020-01-25 14:40   ` kbuild test robot
  2 siblings, 0 replies; 14+ messages in thread
From: kbuild test robot @ 2020-01-25 14:40 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: kbuild-all, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, Linus Torvalds, Alexander Viro, Andrew Morton,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	Jani Nikula, linux-kernel, linuxppc-dev, linux-fsdevel, linux-mm,
	dri-devel, x86

[-- Attachment #1: Type: text/plain, Size: 6360 bytes --]

Hi Christophe,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on tip/x86/core drm-intel/for-linux-next v5.5-rc7]
[cannot apply to linus/master next-20200124]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Christophe-Leroy/fs-readdir-Fix-filldir-and-filldir64-use-of-user_access_begin/20200125-070606
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: x86_64-randconfig-s0-20200125 (attached as .config)
compiler: gcc-7 (Debian 7.5.0-3) 7.5.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   kernel/exit.c: In function '__do_sys_waitid':
>> kernel/exit.c:1567:53: error: macro "user_access_begin" passed 3 arguments, but takes just 2
     key = user_access_begin(infop, sizeof(*infop), true);
                                                        ^
>> kernel/exit.c:1567:6: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
     key = user_access_begin(infop, sizeof(*infop), true);
         ^
   kernel/exit.c: In function '__do_compat_sys_waitid':
   kernel/exit.c:1697:53: error: macro "user_access_begin" passed 3 arguments, but takes just 2
     key = user_access_begin(infop, sizeof(*infop), true);
                                                        ^
   kernel/exit.c:1697:6: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
     key = user_access_begin(infop, sizeof(*infop), true);
         ^
--
   kernel/compat.c: In function 'compat_get_bitmap':
>> kernel/compat.c:267:55: error: macro "user_access_begin" passed 3 arguments, but takes just 2
     key = user_access_begin(umask, bitmap_size / 8, false);
                                                          ^
>> kernel/compat.c:267:6: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
     key = user_access_begin(umask, bitmap_size / 8, false);
         ^
   kernel/compat.c: In function 'compat_put_bitmap':
   kernel/compat.c:298:54: error: macro "user_access_begin" passed 3 arguments, but takes just 2
     key = user_access_begin(umask, bitmap_size / 8, true);
                                                         ^
   kernel/compat.c:298:6: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
     key = user_access_begin(umask, bitmap_size / 8, true);
         ^
--
   fs/readdir.c: In function 'filldir':
>> fs/readdir.c:242:58: error: macro "user_access_begin" passed 3 arguments, but takes just 2
     key = user_access_begin(prev, reclen + prev_reclen, true);
                                                             ^
>> fs/readdir.c:242:6: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
     key = user_access_begin(prev, reclen + prev_reclen, true);
         ^
   fs/readdir.c: In function 'filldir64':
   fs/readdir.c:329:58: error: macro "user_access_begin" passed 3 arguments, but takes just 2
     key = user_access_begin(prev, reclen + prev_reclen, true);
                                                             ^
   fs/readdir.c:329:6: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
     key = user_access_begin(prev, reclen + prev_reclen, true);
         ^
--
   lib/usercopy.c: In function 'check_zeroed_user':
>> lib/usercopy.c:62:43: error: macro "user_access_begin" passed 3 arguments, but takes just 2
     key = user_access_begin(from, size, false);
                                              ^
>> lib/usercopy.c:62:6: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
     key = user_access_begin(from, size, false);
         ^
--
   lib/strncpy_from_user.c: In function 'strncpy_from_user':
>> lib/strncpy_from_user.c:120:42: error: macro "user_access_begin" passed 3 arguments, but takes just 2
      key = user_access_begin(src, max, false);
                                             ^
>> lib/strncpy_from_user.c:120:7: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
      key = user_access_begin(src, max, false);
          ^
--
   lib/strnlen_user.c: In function 'strnlen_user':
>> lib/strnlen_user.c:113:42: error: macro "user_access_begin" passed 3 arguments, but takes just 2
      key = user_access_begin(str, max, false);
                                             ^
>> lib/strnlen_user.c:113:7: warning: assignment makes integer from pointer without a cast [-Wint-conversion]
      key = user_access_begin(str, max, false);
          ^

vim +/user_access_begin +1567 kernel/exit.c

  1548	
  1549	SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
  1550			infop, int, options, struct rusage __user *, ru)
  1551	{
  1552		struct rusage r;
  1553		struct waitid_info info = {.status = 0};
  1554		long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
  1555		int signo = 0;
  1556		unsigned long key;
  1557	
  1558		if (err > 0) {
  1559			signo = SIGCHLD;
  1560			err = 0;
  1561			if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
  1562				return -EFAULT;
  1563		}
  1564		if (!infop)
  1565			return err;
  1566	
> 1567		key = user_access_begin(infop, sizeof(*infop), true);
  1568		if (!key)
  1569			return -EFAULT;
  1570	
  1571		unsafe_put_user(signo, &infop->si_signo, Efault);
  1572		unsafe_put_user(0, &infop->si_errno, Efault);
  1573		unsafe_put_user(info.cause, &infop->si_code, Efault);
  1574		unsafe_put_user(info.pid, &infop->si_pid, Efault);
  1575		unsafe_put_user(info.uid, &infop->si_uid, Efault);
  1576		unsafe_put_user(info.status, &infop->si_status, Efault);
  1577		user_access_end(key);
  1578		return err;
  1579	Efault:
  1580		user_access_end(key);
  1581		return -EFAULT;
  1582	}
  1583	

---
0-DAY kernel test infrastructure                 Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 38991 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-01-25 14:41 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-23 12:59 [PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin() Christophe Leroy
2020-01-23 12:59 ` [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not Christophe Leroy
2020-01-23 13:11   ` Jani Nikula
2020-01-23 18:02   ` Linus Torvalds
2020-01-23 19:47     ` christophe leroy
2020-01-23 19:57       ` Linus Torvalds
2020-01-24  2:03         ` hpa
2020-01-25  6:17     ` Tony Luck
2020-01-25 14:40   ` kbuild test robot
2020-01-23 12:59 ` [PATCH v3 3/7] powerpc/32s: Fix bad_kuap_fault() Christophe Leroy
2020-01-23 12:59 ` [PATCH v3 4/7] powerpc/kuap: Fix set direction in allow/prevent_user_access() Christophe Leroy
2020-01-23 12:59 ` [PATCH v3 5/7] powerpc/32s: Drop NULL addr verification Christophe Leroy
2020-01-23 12:59 ` [PATCH v3 6/7] powerpc/32s: Prepare allow_user_access() for user_access_begin() Christophe Leroy
2020-01-23 12:59 ` [PATCH v3 7/7] powerpc: Implement user_access_begin and friends Christophe Leroy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).