linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* remove set_fs calls from the exec and coredump code v2
@ 2020-04-14  7:01 Christoph Hellwig
  2020-04-14  7:01 ` [PATCH 1/8] powerpc/spufs: simplify spufs core dumping Christoph Hellwig
                   ` (8 more replies)
  0 siblings, 9 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

Hi all,

this series gets rid of playing with the address limit in the exec and
coredump code.  Most of this was fairly trivial, the biggest changes are
those to the spufs coredump code.

Changes since v1:
 - properly spell NUL
 - properly handle the compat siginfo case in ELF coredumps

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/8] powerpc/spufs: simplify spufs core dumping
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
@ 2020-04-14  7:01 ` Christoph Hellwig
  2020-04-14  7:01 ` [PATCH 2/8] signal: clean up __copy_siginfo_to_user32 Christoph Hellwig
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

Replace the coredump ->read method with a ->dump method that must call
dump_emit itself.  That way we avoid a buffer allocation an messing with
set_fs() to call into code that is intended to deal with user buffers.
For the ->get case we can now use a small on-stack buffer and avoid
memory allocations as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Jeremy Kerr <jk@ozlabs.org>
---
 arch/powerpc/platforms/cell/spufs/coredump.c |  87 ++----
 arch/powerpc/platforms/cell/spufs/file.c     | 273 ++++++++++---------
 arch/powerpc/platforms/cell/spufs/spufs.h    |   3 +-
 3 files changed, 170 insertions(+), 193 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 8b3296b62f65..3b75e8f60609 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -21,22 +21,6 @@
 
 #include "spufs.h"
 
-static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
-				size_t size, loff_t *off)
-{
-	u64 data;
-	int ret;
-
-	if (spufs_coredump_read[num].read)
-		return spufs_coredump_read[num].read(ctx, buffer, size, off);
-
-	data = spufs_coredump_read[num].get(ctx);
-	ret = snprintf(buffer, size, "0x%.16llx", data);
-	if (ret >= size)
-		return size;
-	return ++ret; /* count trailing NULL */
-}
-
 static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
 {
 	int i, sz, total = 0;
@@ -118,58 +102,43 @@ int spufs_coredump_extra_notes_size(void)
 static int spufs_arch_write_note(struct spu_context *ctx, int i,
 				  struct coredump_params *cprm, int dfd)
 {
-	loff_t pos = 0;
-	int sz, rc, total = 0;
-	const int bufsz = PAGE_SIZE;
-	char *name;
-	char fullname[80], *buf;
+	size_t sz = spufs_coredump_read[i].size;
+	char fullname[80];
 	struct elf_note en;
-	size_t skip;
-
-	buf = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
+	size_t ret;
 
-	name = spufs_coredump_read[i].name;
-	sz = spufs_coredump_read[i].size;
-
-	sprintf(fullname, "SPU/%d/%s", dfd, name);
+	sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name);
 	en.n_namesz = strlen(fullname) + 1;
 	en.n_descsz = sz;
 	en.n_type = NT_SPU;
 
 	if (!dump_emit(cprm, &en, sizeof(en)))
-		goto Eio;
-
+		return -EIO;
 	if (!dump_emit(cprm, fullname, en.n_namesz))
-		goto Eio;
-
+		return -EIO;
 	if (!dump_align(cprm, 4))
-		goto Eio;
-
-	do {
-		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
-		if (rc > 0) {
-			if (!dump_emit(cprm, buf, rc))
-				goto Eio;
-			total += rc;
-		}
-	} while (rc == bufsz && total < sz);
-
-	if (rc < 0)
-		goto out;
-
-	skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
-	if (!dump_skip(cprm, skip))
-		goto Eio;
-
-	rc = 0;
-out:
-	free_page((unsigned long)buf);
-	return rc;
-Eio:
-	free_page((unsigned long)buf);
-	return -EIO;
+		return -EIO;
+
+	if (spufs_coredump_read[i].dump) {
+		ret = spufs_coredump_read[i].dump(ctx, cprm);
+		if (ret < 0)
+			return ret;
+	} else {
+		char buf[32];
+
+		ret = snprintf(buf, sizeof(buf), "0x%.16llx",
+			       spufs_coredump_read[i].get(ctx));
+		if (ret >= sizeof(buf))
+			return sizeof(buf);
+
+		/* count trailing the NULL: */
+		if (!dump_emit(cprm, buf, ret + 1))
+			return -EIO;
+	}
+
+	if (!dump_skip(cprm, roundup(cprm->pos - ret + sz, 4) - cprm->pos))
+		return -EIO;
+	return 0;
 }
 
 int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index c0f950a3f4e1..0f8c3d692af0 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -9,6 +9,7 @@
 
 #undef DEBUG
 
+#include <linux/coredump.h>
 #include <linux/fs.h>
 #include <linux/ioctl.h>
 #include <linux/export.h>
@@ -129,6 +130,14 @@ static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
 	return ret;
 }
 
+static ssize_t spufs_dump_emit(struct coredump_params *cprm, void *buf,
+		size_t size)
+{
+	if (!dump_emit(cprm, buf, size))
+		return -EIO;
+	return size;
+}
+
 #define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)	\
 static int __fops ## _open(struct inode *inode, struct file *file)	\
 {									\
@@ -172,12 +181,9 @@ spufs_mem_release(struct inode *inode, struct file *file)
 }
 
 static ssize_t
-__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
-			size_t size, loff_t *pos)
+spufs_mem_dump(struct spu_context *ctx, struct coredump_params *cprm)
 {
-	char *local_store = ctx->ops->get_ls(ctx);
-	return simple_read_from_buffer(buffer, size, pos, local_store,
-					LS_SIZE);
+	return spufs_dump_emit(cprm, ctx->ops->get_ls(ctx), LS_SIZE);
 }
 
 static ssize_t
@@ -190,7 +196,8 @@ spufs_mem_read(struct file *file, char __user *buffer,
 	ret = spu_acquire(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_mem_read(ctx, buffer, size, pos);
+	ret = simple_read_from_buffer(buffer, size, pos, ctx->ops->get_ls(ctx),
+				      LS_SIZE);
 	spu_release(ctx);
 
 	return ret;
@@ -459,12 +466,10 @@ spufs_regs_open(struct inode *inode, struct file *file)
 }
 
 static ssize_t
-__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
-			size_t size, loff_t *pos)
+spufs_regs_dump(struct spu_context *ctx, struct coredump_params *cprm)
 {
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	return simple_read_from_buffer(buffer, size, pos,
-				      lscsa->gprs, sizeof lscsa->gprs);
+	return spufs_dump_emit(cprm, ctx->csa.lscsa->gprs,
+			       sizeof(ctx->csa.lscsa->gprs));
 }
 
 static ssize_t
@@ -482,7 +487,8 @@ spufs_regs_read(struct file *file, char __user *buffer,
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_regs_read(ctx, buffer, size, pos);
+	ret = simple_read_from_buffer(buffer, size, pos, ctx->csa.lscsa->gprs,
+				      sizeof(ctx->csa.lscsa->gprs));
 	spu_release_saved(ctx);
 	return ret;
 }
@@ -517,12 +523,10 @@ static const struct file_operations spufs_regs_fops = {
 };
 
 static ssize_t
-__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
-			size_t size, loff_t * pos)
+spufs_fpcr_dump(struct spu_context *ctx, struct coredump_params *cprm)
 {
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	return simple_read_from_buffer(buffer, size, pos,
-				      &lscsa->fpcr, sizeof(lscsa->fpcr));
+	return spufs_dump_emit(cprm, &ctx->csa.lscsa->fpcr,
+			       sizeof(ctx->csa.lscsa->fpcr));
 }
 
 static ssize_t
@@ -535,7 +539,8 @@ spufs_fpcr_read(struct file *file, char __user * buffer,
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_fpcr_read(ctx, buffer, size, pos);
+	ret = simple_read_from_buffer(buffer, size, pos, &ctx->csa.lscsa->fpcr,
+				      sizeof(ctx->csa.lscsa->fpcr));
 	spu_release_saved(ctx);
 	return ret;
 }
@@ -967,28 +972,26 @@ spufs_signal1_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
-			size_t len, loff_t *pos)
+static ssize_t spufs_signal1_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
 {
-	int ret = 0;
-	u32 data;
+	if (!ctx->csa.spu_chnlcnt_RW[3])
+		return 0;
+	return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[3],
+			       sizeof(ctx->csa.spu_chnldata_RW[3]));
+}
 
-	if (len < 4)
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+			size_t len)
+{
+	if (len < sizeof(ctx->csa.spu_chnldata_RW[3]))
 		return -EINVAL;
-
-	if (ctx->csa.spu_chnlcnt_RW[3]) {
-		data = ctx->csa.spu_chnldata_RW[3];
-		ret = 4;
-	}
-
-	if (!ret)
-		goto out;
-
-	if (copy_to_user(buf, &data, 4))
+	if (!ctx->csa.spu_chnlcnt_RW[3])
+		return 0;
+	if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[3],
+			 sizeof(ctx->csa.spu_chnldata_RW[3])))
 		return -EFAULT;
-
-out:
-	return ret;
+	return sizeof(ctx->csa.spu_chnldata_RW[3]);
 }
 
 static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
@@ -1000,7 +1003,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_signal1_read(ctx, buf, len, pos);
+	ret = __spufs_signal1_read(ctx, buf, len);
 	spu_release_saved(ctx);
 
 	return ret;
@@ -1104,28 +1107,26 @@ spufs_signal2_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
-			size_t len, loff_t *pos)
+static ssize_t spufs_signal2_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
 {
-	int ret = 0;
-	u32 data;
+	if (!ctx->csa.spu_chnlcnt_RW[4])
+		return 0;
+	return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[4],
+			       sizeof(ctx->csa.spu_chnldata_RW[4]));
+}
 
-	if (len < 4)
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+			size_t len)
+{
+	if (len < sizeof(ctx->csa.spu_chnldata_RW[4]))
 		return -EINVAL;
-
-	if (ctx->csa.spu_chnlcnt_RW[4]) {
-		data =  ctx->csa.spu_chnldata_RW[4];
-		ret = 4;
-	}
-
-	if (!ret)
-		goto out;
-
-	if (copy_to_user(buf, &data, 4))
+	if (!ctx->csa.spu_chnlcnt_RW[4])
+		return 0;
+	if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[4],
+			 sizeof(ctx->csa.spu_chnldata_RW[4])))
 		return -EFAULT;
-
-out:
-	return ret;
+	return sizeof(ctx->csa.spu_chnldata_RW[4]);
 }
 
 static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
@@ -1137,7 +1138,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
-	ret = __spufs_signal2_read(ctx, buf, len, pos);
+	ret = __spufs_signal2_read(ctx, buf, len);
 	spu_release_saved(ctx);
 
 	return ret;
@@ -1961,18 +1962,13 @@ static const struct file_operations spufs_caps_fops = {
 	.release	= single_release,
 };
 
-static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
-			char __user *buf, size_t len, loff_t *pos)
+static ssize_t spufs_mbox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
 {
-	u32 data;
-
-	/* EOF if there's no entry in the mbox */
 	if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
 		return 0;
-
-	data = ctx->csa.prob.pu_mb_R;
-
-	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+	return spufs_dump_emit(cprm, &ctx->csa.prob.pu_mb_R,
+			       sizeof(ctx->csa.prob.pu_mb_R));
 }
 
 static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
@@ -1988,7 +1984,12 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_mbox_info_read(ctx, buf, len, pos);
+	/* EOF if there's no entry in the mbox */
+	if (ctx->csa.prob.mb_stat_R & 0x0000ff) {
+		ret = simple_read_from_buffer(buf, len, pos,
+				&ctx->csa.prob.pu_mb_R,
+				sizeof(ctx->csa.prob.pu_mb_R));
+	}
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
@@ -2001,18 +2002,13 @@ static const struct file_operations spufs_mbox_info_fops = {
 	.llseek  = generic_file_llseek,
 };
 
-static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
-				char __user *buf, size_t len, loff_t *pos)
+static ssize_t spufs_ibox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
 {
-	u32 data;
-
-	/* EOF if there's no entry in the ibox */
 	if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
 		return 0;
-
-	data = ctx->csa.priv2.puint_mb_R;
-
-	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+	return spufs_dump_emit(cprm, &ctx->csa.priv2.puint_mb_R,
+			       sizeof(ctx->csa.priv2.puint_mb_R));
 }
 
 static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
@@ -2028,7 +2024,12 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_ibox_info_read(ctx, buf, len, pos);
+	/* EOF if there's no entry in the ibox */
+	if (ctx->csa.prob.mb_stat_R & 0xff0000) {
+		ret = simple_read_from_buffer(buf, len, pos,
+				&ctx->csa.priv2.puint_mb_R,
+				sizeof(ctx->csa.priv2.puint_mb_R));
+	}
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
@@ -2041,21 +2042,16 @@ static const struct file_operations spufs_ibox_info_fops = {
 	.llseek  = generic_file_llseek,
 };
 
-static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
-			char __user *buf, size_t len, loff_t *pos)
+static size_t spufs_wbox_info_cnt(struct spu_context *ctx)
 {
-	int i, cnt;
-	u32 data[4];
-	u32 wbox_stat;
-
-	wbox_stat = ctx->csa.prob.mb_stat_R;
-	cnt = 4 - ((wbox_stat & 0x00ff00) >> 8);
-	for (i = 0; i < cnt; i++) {
-		data[i] = ctx->csa.spu_mailbox_data[i];
-	}
+	return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32);
+}
 
-	return simple_read_from_buffer(buf, len, pos, &data,
-				cnt * sizeof(u32));
+static ssize_t spufs_wbox_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	return spufs_dump_emit(cprm, &ctx->csa.spu_mailbox_data,
+			spufs_wbox_info_cnt(ctx));
 }
 
 static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
@@ -2071,7 +2067,8 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_wbox_info_read(ctx, buf, len, pos);
+	ret = simple_read_from_buffer(buf, len, pos, &ctx->csa.spu_mailbox_data,
+				      spufs_wbox_info_cnt(ctx));
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
@@ -2084,36 +2081,42 @@ static const struct file_operations spufs_wbox_info_fops = {
 	.llseek  = generic_file_llseek,
 };
 
-static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
-			char __user *buf, size_t len, loff_t *pos)
+static void __spufs_dma_info_read(struct spu_context *ctx,
+		struct spu_dma_info *info)
 {
-	struct spu_dma_info info;
-	struct mfc_cq_sr *qp, *spuqp;
 	int i;
 
-	info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
-	info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
-	info.dma_info_status = ctx->csa.spu_chnldata_RW[24];
-	info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
-	info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+	info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+	info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+	info->dma_info_status = ctx->csa.spu_chnldata_RW[24];
+	info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+	info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+
 	for (i = 0; i < 16; i++) {
-		qp = &info.dma_info_command_data[i];
-		spuqp = &ctx->csa.priv2.spuq[i];
+		struct mfc_cq_sr *qp = &info->dma_info_command_data[i];
+		struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i];
 
 		qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
 		qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
 		qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
 		qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
 	}
+}
+
+static ssize_t spufs_dma_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	struct spu_dma_info info;
 
-	return simple_read_from_buffer(buf, len, pos, &info,
-				sizeof info);
+	__spufs_dma_info_read(ctx, &info);
+	return spufs_dump_emit(cprm, &info, sizeof(info));
 }
 
 static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
 			      size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
+	struct spu_dma_info info;
 	int ret;
 
 	if (!access_ok(buf, len))
@@ -2123,7 +2126,8 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_dma_info_read(ctx, buf, len, pos);
+	__spufs_dma_info_read(ctx, &info);
+	ret = simple_read_from_buffer(buf, len, pos, &info, sizeof(info));
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
@@ -2136,48 +2140,53 @@ static const struct file_operations spufs_dma_info_fops = {
 	.llseek = no_llseek,
 };
 
-static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
-			char __user *buf, size_t len, loff_t *pos)
+static void __spufs_proxydma_info_read(struct spu_context *ctx,
+	struct spu_proxydma_info *info)
 {
-	struct spu_proxydma_info info;
-	struct mfc_cq_sr *qp, *puqp;
-	int ret = sizeof info;
 	int i;
 
-	if (len < ret)
-		return -EINVAL;
-
-	if (!access_ok(buf, len))
-		return -EFAULT;
+	info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+	info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+	info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
 
-	info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
-	info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
-	info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
 	for (i = 0; i < 8; i++) {
-		qp = &info.proxydma_info_command_data[i];
-		puqp = &ctx->csa.priv2.puq[i];
+		struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i];
+		struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i];
 
 		qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
 		qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
 		qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
 		qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
 	}
+}
+
+static ssize_t spufs_proxydma_info_dump(struct spu_context *ctx,
+		struct coredump_params *cprm)
+{
+	struct spu_proxydma_info info;
 
-	return simple_read_from_buffer(buf, len, pos, &info,
-				sizeof info);
+	__spufs_proxydma_info_read(ctx, &info);
+	return spufs_dump_emit(cprm, &info, sizeof(info));
 }
 
 static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
 				   size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
+	struct spu_proxydma_info info;
 	int ret;
 
+	if (len < sizeof(info))
+		return -EINVAL;
+	if (!access_ok(buf, len))
+		return -EFAULT;
+
 	ret = spu_acquire_saved(ctx);
 	if (ret)
 		return ret;
 	spin_lock(&ctx->csa.register_lock);
-	ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
+	__spufs_proxydma_info_read(ctx, &info);
+	ret = simple_read_from_buffer(buf, len, pos, &info, sizeof(info));
 	spin_unlock(&ctx->csa.register_lock);
 	spu_release_saved(ctx);
 
@@ -2625,23 +2634,23 @@ const struct spufs_tree_descr spufs_dir_debug_contents[] = {
 };
 
 const struct spufs_coredump_reader spufs_coredump_read[] = {
-	{ "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])},
-	{ "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) },
+	{ "regs", spufs_regs_dump, NULL, sizeof(struct spu_reg128[128])},
+	{ "fpcr", spufs_fpcr_dump, NULL, sizeof(struct spu_reg128) },
 	{ "lslr", NULL, spufs_lslr_get, 19 },
 	{ "decr", NULL, spufs_decr_get, 19 },
 	{ "decr_status", NULL, spufs_decr_status_get, 19 },
-	{ "mem", __spufs_mem_read, NULL, LS_SIZE, },
-	{ "signal1", __spufs_signal1_read, NULL, sizeof(u32) },
+	{ "mem", spufs_mem_dump, NULL, LS_SIZE, },
+	{ "signal1", spufs_signal1_dump, NULL, sizeof(u32) },
 	{ "signal1_type", NULL, spufs_signal1_type_get, 19 },
-	{ "signal2", __spufs_signal2_read, NULL, sizeof(u32) },
+	{ "signal2", spufs_signal2_dump, NULL, sizeof(u32) },
 	{ "signal2_type", NULL, spufs_signal2_type_get, 19 },
 	{ "event_mask", NULL, spufs_event_mask_get, 19 },
 	{ "event_status", NULL, spufs_event_status_get, 19 },
-	{ "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) },
-	{ "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) },
-	{ "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)},
-	{ "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)},
-	{ "proxydma_info", __spufs_proxydma_info_read,
+	{ "mbox_info", spufs_mbox_info_dump, NULL, sizeof(u32) },
+	{ "ibox_info", spufs_ibox_info_dump, NULL, sizeof(u32) },
+	{ "wbox_info", spufs_wbox_info_dump, NULL, 4 * sizeof(u32)},
+	{ "dma_info", spufs_dma_info_dump, NULL, sizeof(struct spu_dma_info)},
+	{ "proxydma_info", spufs_proxydma_info_dump,
 			   NULL, sizeof(struct spu_proxydma_info)},
 	{ "object-id", NULL, spufs_object_id_get, 19 },
 	{ "npc", NULL, spufs_npc_get, 19 },
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 413c89afe112..1ba4d884febf 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -337,8 +337,7 @@ void spufs_dma_callback(struct spu *spu, int type);
 extern struct spu_coredump_calls spufs_coredump_calls;
 struct spufs_coredump_reader {
 	char *name;
-	ssize_t (*read)(struct spu_context *ctx,
-			char __user *buffer, size_t size, loff_t *pos);
+	ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm);
 	u64 (*get)(struct spu_context *ctx);
 	size_t size;
 };
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 2/8] signal: clean up __copy_siginfo_to_user32
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
  2020-04-14  7:01 ` [PATCH 1/8] powerpc/spufs: simplify spufs core dumping Christoph Hellwig
@ 2020-04-14  7:01 ` Christoph Hellwig
  2020-04-17 21:08   ` Eric W. Biederman
  2020-04-14  7:01 ` [PATCH 3/8] signal: replace __copy_siginfo_to_user32 with to_compat_siginfo Christoph Hellwig
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

Instead of an architecture specific calling convention in common code
just pass a flags argument with architecture specific values.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/ia32/ia32_signal.c   |  2 +-
 arch/x86/include/asm/compat.h |  4 ----
 arch/x86/include/asm/signal.h |  3 +++
 arch/x86/kernel/signal.c      |  3 ++-
 include/linux/compat.h        |  2 ++
 kernel/signal.c               | 21 ++++++++++++---------
 6 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index f9d8804144d0..2bf188942d5c 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -350,7 +350,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
 	unsafe_put_user(*(__u64 *)set, (__u64 *)&frame->uc.uc_sigmask, Efault);
 	user_access_end();
 
-	if (__copy_siginfo_to_user32(&frame->info, &ksig->info, false))
+	if (__copy_siginfo_to_user32(&frame->info, &ksig->info, SA_IA32_ABI))
 		return -EFAULT;
 
 	/* Set up registers for signal handler */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 52e9f3480f69..a787c9a82030 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -213,8 +213,4 @@ static inline bool in_compat_syscall(void)
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
 #endif
 
-struct compat_siginfo;
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-		const kernel_siginfo_t *from, bool x32_ABI);
-
 #endif /* _ASM_X86_COMPAT_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 33d3c88a7225..b3f7a14da428 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -28,6 +28,9 @@ typedef struct {
 #define SA_IA32_ABI	0x02000000u
 #define SA_X32_ABI	0x01000000u
 
+#define compat_siginfo_flags() \
+	(in_x32_syscall() ? SA_X32_ABI : SA_IA32_ABI)
+
 #ifndef CONFIG_COMPAT
 typedef sigset_t compat_sigset_t;
 #endif
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 83b74fb38c8f..bbd451631790 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -543,7 +543,8 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 	user_access_end();
 
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-		if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
+		if (__copy_siginfo_to_user32(&frame->info, &ksig->info,
+				SA_X32_ABI))
 			return -EFAULT;
 	}
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0480ba4db592..14eec6116110 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -404,6 +404,8 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
 int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from);
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from);
+int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
+		const kernel_siginfo_t *from, unsigned int flags);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index e58a6c619824..092fee008242 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3235,15 +3235,8 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
 }
 
 #ifdef CONFIG_COMPAT
-int copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			   const struct kernel_siginfo *from)
-#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
-{
-	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
-}
 int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			     const struct kernel_siginfo *from, bool x32_ABI)
-#endif
+		const struct kernel_siginfo *from, unsigned int flags)
 {
 	struct compat_siginfo new;
 	memset(&new, 0, sizeof(new));
@@ -3298,7 +3291,7 @@ int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
 		new.si_uid    = from->si_uid;
 		new.si_status = from->si_status;
 #ifdef CONFIG_X86_X32_ABI
-		if (x32_ABI) {
+		if (flags & SA_X32_ABI) {
 			new._sifields._sigchld_x32._utime = from->si_utime;
 			new._sifields._sigchld_x32._stime = from->si_stime;
 		} else
@@ -3326,6 +3319,16 @@ int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
 	return 0;
 }
 
+#ifndef compat_siginfo_flags
+#define compat_siginfo_flags()		0
+#endif
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+			   const struct kernel_siginfo *from)
+{
+	return __copy_siginfo_to_user32(to, from, compat_siginfo_flags());
+}
+
 static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
 					 const struct compat_siginfo *from)
 {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 3/8] signal: replace __copy_siginfo_to_user32 with to_compat_siginfo
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
  2020-04-14  7:01 ` [PATCH 1/8] powerpc/spufs: simplify spufs core dumping Christoph Hellwig
  2020-04-14  7:01 ` [PATCH 2/8] signal: clean up __copy_siginfo_to_user32 Christoph Hellwig
@ 2020-04-14  7:01 ` Christoph Hellwig
  2020-04-14 14:00   ` Arnd Bergmann
  2020-04-14  7:01 ` [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer Christoph Hellwig
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

Move copying the siginfo to userspace into the callers, so that the
compat_siginfo conversion can be reused by the ELF coredump code without
set_fs magic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/ia32/ia32_signal.c |  4 +-
 arch/x86/kernel/signal.c    |  5 ++-
 include/linux/compat.h      |  4 +-
 kernel/signal.c             | 89 ++++++++++++++++++-------------------
 4 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 2bf188942d5c..0fbaed2562bc 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -301,6 +301,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
 			compat_sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe_ia32 __user *frame;
+	struct compat_siginfo new;
 	void __user *restorer;
 	void __user *fp = NULL;
 
@@ -350,7 +351,8 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
 	unsafe_put_user(*(__u64 *)set, (__u64 *)&frame->uc.uc_sigmask, Efault);
 	user_access_end();
 
-	if (__copy_siginfo_to_user32(&frame->info, &ksig->info, SA_IA32_ABI))
+	to_compat_siginfo(&new, &ksig->info, SA_IA32_ABI);
+	if (copy_to_user(&frame->info, &new, sizeof(frame->info)))
 		return -EFAULT;
 
 	/* Set up registers for signal handler */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index bbd451631790..6ff1265f071b 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -517,6 +517,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 {
 #ifdef CONFIG_X86_X32_ABI
 	struct rt_sigframe_x32 __user *frame;
+	struct compat_siginfo new;
 	unsigned long uc_flags;
 	void __user *restorer;
 	void __user *fp = NULL;
@@ -543,8 +544,8 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 	user_access_end();
 
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-		if (__copy_siginfo_to_user32(&frame->info, &ksig->info,
-				SA_X32_ABI))
+		to_compat_siginfo(&new, &ksig->info, SA_X32_ABI);
+		if (copy_to_user(&frame->info, &new, sizeof(frame->info)))
 			return -EFAULT;
 	}
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 14eec6116110..218ebba1e454 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -404,8 +404,8 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
 int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from);
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from);
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-		const kernel_siginfo_t *from, unsigned int flags);
+void to_compat_siginfo(struct compat_siginfo *to,
+		const struct kernel_siginfo *from, unsigned int flags);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 092fee008242..0f3e7fded3a5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3235,88 +3235,82 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
 }
 
 #ifdef CONFIG_COMPAT
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
+void to_compat_siginfo(struct compat_siginfo *to,
 		const struct kernel_siginfo *from, unsigned int flags)
 {
-	struct compat_siginfo new;
-	memset(&new, 0, sizeof(new));
+	memset(to, 0, sizeof(*to));
+	to->si_signo = from->si_signo;
+	to->si_errno = from->si_errno;
+	to->si_code  = from->si_code;
 
-	new.si_signo = from->si_signo;
-	new.si_errno = from->si_errno;
-	new.si_code  = from->si_code;
-	switch(siginfo_layout(from->si_signo, from->si_code)) {
+	switch (siginfo_layout(from->si_signo, from->si_code)) {
 	case SIL_KILL:
-		new.si_pid = from->si_pid;
-		new.si_uid = from->si_uid;
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
 		break;
 	case SIL_TIMER:
-		new.si_tid     = from->si_tid;
-		new.si_overrun = from->si_overrun;
-		new.si_int     = from->si_int;
+		to->si_tid     = from->si_tid;
+		to->si_overrun = from->si_overrun;
+		to->si_int     = from->si_int;
 		break;
 	case SIL_POLL:
-		new.si_band = from->si_band;
-		new.si_fd   = from->si_fd;
+		to->si_band = from->si_band;
+		to->si_fd   = from->si_fd;
 		break;
 	case SIL_FAULT:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
 		break;
 	case SIL_FAULT_MCEERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_addr_lsb = from->si_addr_lsb;
+		to->si_addr_lsb = from->si_addr_lsb;
 		break;
 	case SIL_FAULT_BNDERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_lower = ptr_to_compat(from->si_lower);
-		new.si_upper = ptr_to_compat(from->si_upper);
+		to->si_lower = ptr_to_compat(from->si_lower);
+		to->si_upper = ptr_to_compat(from->si_upper);
 		break;
 	case SIL_FAULT_PKUERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_pkey = from->si_pkey;
+		to->si_pkey = from->si_pkey;
 		break;
 	case SIL_CHLD:
-		new.si_pid    = from->si_pid;
-		new.si_uid    = from->si_uid;
-		new.si_status = from->si_status;
+		to->si_pid    = from->si_pid;
+		to->si_uid    = from->si_uid;
+		to->si_status = from->si_status;
 #ifdef CONFIG_X86_X32_ABI
 		if (flags & SA_X32_ABI) {
-			new._sifields._sigchld_x32._utime = from->si_utime;
-			new._sifields._sigchld_x32._stime = from->si_stime;
+			to->_sifields._sigchld_x32._utime = from->si_utime;
+			to->_sifields._sigchld_x32._stime = from->si_stime;
 		} else
 #endif
 		{
-			new.si_utime = from->si_utime;
-			new.si_stime = from->si_stime;
+			to->si_utime = from->si_utime;
+			to->si_stime = from->si_stime;
 		}
 		break;
 	case SIL_RT:
-		new.si_pid = from->si_pid;
-		new.si_uid = from->si_uid;
-		new.si_int = from->si_int;
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
+		to->si_int = from->si_int;
 		break;
 	case SIL_SYS:
-		new.si_call_addr = ptr_to_compat(from->si_call_addr);
-		new.si_syscall   = from->si_syscall;
-		new.si_arch      = from->si_arch;
+		to->si_call_addr = ptr_to_compat(from->si_call_addr);
+		to->si_syscall   = from->si_syscall;
+		to->si_arch      = from->si_arch;
 		break;
 	}
-
-	if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
-		return -EFAULT;
-
-	return 0;
 }
 
 #ifndef compat_siginfo_flags
@@ -3326,7 +3320,12 @@ int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
 int copy_siginfo_to_user32(struct compat_siginfo __user *to,
 			   const struct kernel_siginfo *from)
 {
-	return __copy_siginfo_to_user32(to, from, compat_siginfo_flags());
+	struct compat_siginfo new;
+
+	to_compat_siginfo(&new, from, compat_siginfo_flags());
+	if (copy_to_user(to, &new, sizeof(*to)))
+		return -EFAULT;
+	return 0;
 }
 
 static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2020-04-14  7:01 ` [PATCH 3/8] signal: replace __copy_siginfo_to_user32 with to_compat_siginfo Christoph Hellwig
@ 2020-04-14  7:01 ` Christoph Hellwig
  2020-04-14 13:15   ` Arnd Bergmann
  2020-04-15  3:01   ` Michael Ellerman
  2020-04-14  7:01 ` [PATCH 5/8] binfmt_elf: remove the set_fs(KERNEL_DS) in elf_core_dump Christoph Hellwig
                   ` (4 subsequent siblings)
  8 siblings, 2 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

Instead of messing with the address limit just open code the trivial
memcpy + memset logic for the native version, and a call to
to_compat_siginfo for the compat version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/binfmt_elf.c        | 9 +++++----
 fs/compat_binfmt_elf.c | 6 +++++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 13f25e241ac4..607c5a5f855e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1553,15 +1553,16 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
 }
 
+#ifndef fill_siginfo_note
 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
 		const kernel_siginfo_t *siginfo)
 {
-	mm_segment_t old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
-	set_fs(old_fs);
+	memcpy(csigdata, siginfo, sizeof(struct kernel_siginfo));
+	memset((char *)csigdata + sizeof(struct kernel_siginfo), 0,
+		SI_EXPANSION_SIZE);
 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
 }
+#endif
 
 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
 /*
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index aaad4ca1217e..ab84e095618b 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -39,7 +39,11 @@
  */
 #define user_long_t		compat_long_t
 #define user_siginfo_t		compat_siginfo_t
-#define copy_siginfo_to_user	copy_siginfo_to_user32
+#define fill_siginfo_note(note, csigdata, siginfo)		\
+do {									\
+	to_compat_siginfo(csigdata, siginfo, compat_siginfo_flags());	\
+	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); \
+} while (0)
 
 /*
  * The machine-dependent core note format types are defined in elfcore-compat.h,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 5/8] binfmt_elf: remove the set_fs(KERNEL_DS) in elf_core_dump
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2020-04-14  7:01 ` [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer Christoph Hellwig
@ 2020-04-14  7:01 ` Christoph Hellwig
  2020-04-14  7:01 ` [PATCH 6/8] binfmt_elf_fdpic: remove the set_fs(KERNEL_DS) in elf_fdpic_core_dump Christoph Hellwig
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

There is no logic in elf_core_dump itself, or in the various arch helpers
called from it which use uaccess routines on kernel pointers, except for
the actual file writes that nicely encapsulated in __kernel_write used by
dump_emit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/binfmt_elf.c | 40 +++++++++++++---------------------------
 1 file changed, 13 insertions(+), 27 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 607c5a5f855e..a53a6faa34b1 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1355,7 +1355,6 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
 		u32 __user *header = (u32 __user *) vma->vm_start;
 		u32 word;
-		mm_segment_t fs = get_fs();
 		/*
 		 * Doing it this way gets the constant folded by GCC.
 		 */
@@ -1368,14 +1367,8 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 		magic.elfmag[EI_MAG1] = ELFMAG1;
 		magic.elfmag[EI_MAG2] = ELFMAG2;
 		magic.elfmag[EI_MAG3] = ELFMAG3;
-		/*
-		 * Switch to the user "segment" for get_user(),
-		 * then put back what elf_core_dump() had in place.
-		 */
-		set_fs(USER_DS);
 		if (unlikely(get_user(word, header)))
 			word = 0;
-		set_fs(fs);
 		if (word == magic.cmp)
 			return PAGE_SIZE;
 	}
@@ -2187,7 +2180,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
 static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
-	mm_segment_t fs;
 	int segs, i;
 	size_t vma_data_size = 0;
 	struct vm_area_struct *vma, *gate_vma;
@@ -2240,9 +2232,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	has_dumped = 1;
 
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
 	offset += sizeof(elf);				/* Elf header */
 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
 
@@ -2254,7 +2243,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
 		if (!phdr4note)
-			goto end_coredump;
+			goto cleanup;
 
 		fill_elf_note_phdr(phdr4note, sz, offset);
 		offset += sz;
@@ -2269,7 +2258,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
 			      GFP_KERNEL);
 	if (!vma_filesz)
-		goto end_coredump;
+		goto cleanup;
 
 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
@@ -2287,17 +2276,17 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (e_phnum == PN_XNUM) {
 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
 		if (!shdr4extnum)
-			goto end_coredump;
+			goto cleanup;
 		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
 	}
 
 	offset = dataoff;
 
 	if (!dump_emit(cprm, &elf, sizeof(elf)))
-		goto end_coredump;
+		goto cleanup;
 
 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
-		goto end_coredump;
+		goto cleanup;
 
 	/* Write program headers for segments dump */
 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
@@ -2319,22 +2308,22 @@ static int elf_core_dump(struct coredump_params *cprm)
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
-			goto end_coredump;
+			goto cleanup;
 	}
 
 	if (!elf_core_write_extra_phdrs(cprm, offset))
-		goto end_coredump;
+		goto cleanup;
 
  	/* write out the notes section */
 	if (!write_note_info(&info, cprm))
-		goto end_coredump;
+		goto cleanup;
 
 	if (elf_coredump_extra_notes_write(cprm))
-		goto end_coredump;
+		goto cleanup;
 
 	/* Align to page */
 	if (!dump_skip(cprm, dataoff - cprm->pos))
-		goto end_coredump;
+		goto cleanup;
 
 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
@@ -2356,22 +2345,19 @@ static int elf_core_dump(struct coredump_params *cprm)
 			} else
 				stop = !dump_skip(cprm, PAGE_SIZE);
 			if (stop)
-				goto end_coredump;
+				goto cleanup;
 		}
 	}
 	dump_truncate(cprm);
 
 	if (!elf_core_write_extra_data(cprm))
-		goto end_coredump;
+		goto cleanup;
 
 	if (e_phnum == PN_XNUM) {
 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
-			goto end_coredump;
+			goto cleanup;
 	}
 
-end_coredump:
-	set_fs(fs);
-
 cleanup:
 	free_note_info(&info);
 	kfree(shdr4extnum);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 6/8] binfmt_elf_fdpic: remove the set_fs(KERNEL_DS) in elf_fdpic_core_dump
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2020-04-14  7:01 ` [PATCH 5/8] binfmt_elf: remove the set_fs(KERNEL_DS) in elf_core_dump Christoph Hellwig
@ 2020-04-14  7:01 ` Christoph Hellwig
  2020-04-14  7:01 ` [PATCH 7/8] exec: simplify the copy_strings_kernel calling convention Christoph Hellwig
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

There is no logic in elf_fdpic_core_dump itself that uses uaccess
routines on kernel pointers, the file writes are nicely encapsulated in
dump_emit which does its own set_fs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/binfmt_elf_fdpic.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 240f66663543..c62c17a5c34a 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1549,7 +1549,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 {
 #define	NUM_NOTES	6
 	int has_dumped = 0;
-	mm_segment_t fs;
 	int segs;
 	int i;
 	struct vm_area_struct *vma;
@@ -1678,9 +1677,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			  "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu);
 #endif
 
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
 	offset += sizeof(*elf);				/* Elf header */
 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
 
@@ -1695,7 +1691,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 
 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
 		if (!phdr4note)
-			goto end_coredump;
+			goto cleanup;
 
 		fill_elf_note_phdr(phdr4note, sz, offset);
 		offset += sz;
@@ -1711,17 +1707,17 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	if (e_phnum == PN_XNUM) {
 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
 		if (!shdr4extnum)
-			goto end_coredump;
+			goto cleanup;
 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
 	}
 
 	offset = dataoff;
 
 	if (!dump_emit(cprm, elf, sizeof(*elf)))
-		goto end_coredump;
+		goto cleanup;
 
 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
-		goto end_coredump;
+		goto cleanup;
 
 	/* write program headers for segments dump */
 	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
@@ -1745,16 +1741,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
-			goto end_coredump;
+			goto cleanup;
 	}
 
 	if (!elf_core_write_extra_phdrs(cprm, offset))
-		goto end_coredump;
+		goto cleanup;
 
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
 		if (!writenote(notes + i, cprm))
-			goto end_coredump;
+			goto cleanup;
 
 	/* write out the thread status notes section */
 	list_for_each(t, &thread_list) {
@@ -1763,21 +1759,21 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 
 		for (i = 0; i < tmp->num_notes; i++)
 			if (!writenote(&tmp->notes[i], cprm))
-				goto end_coredump;
+				goto cleanup;
 	}
 
 	if (!dump_skip(cprm, dataoff - cprm->pos))
-		goto end_coredump;
+		goto cleanup;
 
 	if (!elf_fdpic_dump_segments(cprm))
-		goto end_coredump;
+		goto cleanup;
 
 	if (!elf_core_write_extra_data(cprm))
-		goto end_coredump;
+		goto cleanup;
 
 	if (e_phnum == PN_XNUM) {
 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
-			goto end_coredump;
+			goto cleanup;
 	}
 
 	if (cprm->file->f_pos != offset) {
@@ -1787,9 +1783,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 		       cprm->file->f_pos, offset);
 	}
 
-end_coredump:
-	set_fs(fs);
-
 cleanup:
 	while (!list_empty(&thread_list)) {
 		struct list_head *tmp = thread_list.next;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 7/8] exec: simplify the copy_strings_kernel calling convention
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2020-04-14  7:01 ` [PATCH 6/8] binfmt_elf_fdpic: remove the set_fs(KERNEL_DS) in elf_fdpic_core_dump Christoph Hellwig
@ 2020-04-14  7:01 ` Christoph Hellwig
  2020-04-14  7:01 ` [PATCH 8/8] exec: open code copy_string_kernel Christoph Hellwig
  2020-04-17 22:41 ` remove set_fs calls from the exec and coredump code v2 Eric W. Biederman
  8 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

copy_strings_kernel is always used with a single argument,
adjust the calling convention to that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/binfmt_em86.c        |  6 +++---
 fs/binfmt_misc.c        |  4 ++--
 fs/binfmt_script.c      |  6 +++---
 fs/exec.c               | 13 ++++++-------
 include/linux/binfmts.h |  3 +--
 5 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 466497860c62..f33fa668c91f 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -68,15 +68,15 @@ static int load_em86(struct linux_binprm *bprm)
 	 * user environment and arguments are stored.
 	 */
 	remove_arg_zero(bprm);
-	retval = copy_strings_kernel(1, &bprm->filename, bprm);
+	retval = copy_string_kernel(bprm->filename, bprm);
 	if (retval < 0) return retval; 
 	bprm->argc++;
 	if (i_arg) {
-		retval = copy_strings_kernel(1, &i_arg, bprm);
+		retval = copy_string_kernel(i_arg, bprm);
 		if (retval < 0) return retval; 
 		bprm->argc++;
 	}
-	retval = copy_strings_kernel(1, &i_name, bprm);
+	retval = copy_string_kernel(i_name, bprm);
 	if (retval < 0)	return retval;
 	bprm->argc++;
 
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index cdb45829354d..b15257d8ff5e 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -190,13 +190,13 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		bprm->file = NULL;
 	}
 	/* make argv[1] be the path to the binary */
-	retval = copy_strings_kernel(1, &bprm->interp, bprm);
+	retval = copy_string_kernel(bprm->interp, bprm);
 	if (retval < 0)
 		goto error;
 	bprm->argc++;
 
 	/* add the interp as argv[0] */
-	retval = copy_strings_kernel(1, &fmt->interpreter, bprm);
+	retval = copy_string_kernel(fmt->interpreter, bprm);
 	if (retval < 0)
 		goto error;
 	bprm->argc++;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index e9e6a6f4a35f..c4fb7f52a46e 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -117,17 +117,17 @@ static int load_script(struct linux_binprm *bprm)
 	retval = remove_arg_zero(bprm);
 	if (retval)
 		return retval;
-	retval = copy_strings_kernel(1, &bprm->interp, bprm);
+	retval = copy_string_kernel(bprm->interp, bprm);
 	if (retval < 0)
 		return retval;
 	bprm->argc++;
 	if (i_arg) {
-		retval = copy_strings_kernel(1, &i_arg, bprm);
+		retval = copy_string_kernel(i_arg, bprm);
 		if (retval < 0)
 			return retval;
 		bprm->argc++;
 	}
-	retval = copy_strings_kernel(1, &i_name, bprm);
+	retval = copy_string_kernel(i_name, bprm);
 	if (retval)
 		return retval;
 	bprm->argc++;
diff --git a/fs/exec.c b/fs/exec.c
index 06b4c550af5d..b2a77d5acede 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -588,24 +588,23 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
 }
 
 /*
- * Like copy_strings, but get argv and its values from kernel memory.
+ * Copy and argument/environment string from the kernel to the processes stack.
  */
-int copy_strings_kernel(int argc, const char *const *__argv,
-			struct linux_binprm *bprm)
+int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
 {
 	int r;
 	mm_segment_t oldfs = get_fs();
 	struct user_arg_ptr argv = {
-		.ptr.native = (const char __user *const  __user *)__argv,
+		.ptr.native = (const char __user *const  __user *)&arg,
 	};
 
 	set_fs(KERNEL_DS);
-	r = copy_strings(argc, argv, bprm);
+	r = copy_strings(1, argv, bprm);
 	set_fs(oldfs);
 
 	return r;
 }
-EXPORT_SYMBOL(copy_strings_kernel);
+EXPORT_SYMBOL(copy_string_kernel);
 
 #ifdef CONFIG_MMU
 
@@ -1863,7 +1862,7 @@ static int __do_execve_file(int fd, struct filename *filename,
 	if (retval < 0)
 		goto out;
 
-	retval = copy_strings_kernel(1, &bprm->filename, bprm);
+	retval = copy_string_kernel(bprm->filename, bprm);
 	if (retval < 0)
 		goto out;
 
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a345d9fed3d8..3d3afe094c97 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -144,8 +144,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 extern int transfer_args_to_stack(struct linux_binprm *bprm,
 				  unsigned long *sp_location);
 extern int bprm_change_interp(const char *interp, struct linux_binprm *bprm);
-extern int copy_strings_kernel(int argc, const char *const *argv,
-			       struct linux_binprm *bprm);
+int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 8/8] exec: open code copy_string_kernel
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2020-04-14  7:01 ` [PATCH 7/8] exec: simplify the copy_strings_kernel calling convention Christoph Hellwig
@ 2020-04-14  7:01 ` Christoph Hellwig
  2020-04-18  8:15   ` Christophe Leroy
  2020-04-17 22:41 ` remove set_fs calls from the exec and coredump code v2 Eric W. Biederman
  8 siblings, 1 reply; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-14  7:01 UTC (permalink / raw)
  To: Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

Currently copy_string_kernel is just a wrapper around copy_strings that
simplifies the calling conventions and uses set_fs to allow passing a
kernel pointer.  But due to the fact the we only need to handle a single
kernel argument pointer, the logic can be sigificantly simplified while
getting rid of the set_fs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exec.c | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index b2a77d5acede..ea90af1fb236 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -592,17 +592,42 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
  */
 int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
 {
-	int r;
-	mm_segment_t oldfs = get_fs();
-	struct user_arg_ptr argv = {
-		.ptr.native = (const char __user *const  __user *)&arg,
-	};
+	int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
+	unsigned long pos = bprm->p;
+
+	if (len == 0)
+		return -EFAULT;
+	if (!valid_arg_len(bprm, len))
+		return -E2BIG;
+
+	/* We're going to work our way backwards. */
+	arg += len;
+	bprm->p -= len;
+	if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
+		return -E2BIG;
+
+	while (len > 0) {
+		unsigned int bytes_to_copy = min_t(unsigned int, len,
+				min_not_zero(offset_in_page(pos), PAGE_SIZE));
+		struct page *page;
+		char *kaddr;
 
-	set_fs(KERNEL_DS);
-	r = copy_strings(1, argv, bprm);
-	set_fs(oldfs);
+		pos -= bytes_to_copy;
+		arg -= bytes_to_copy;
+		len -= bytes_to_copy;
 
-	return r;
+		page = get_arg_page(bprm, pos, 1);
+		if (!page)
+			return -E2BIG;
+		kaddr = kmap_atomic(page);
+		flush_arg_page(bprm, pos & PAGE_MASK, page);
+		memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy);
+		flush_kernel_dcache_page(page);
+		kunmap_atomic(kaddr);
+		put_arg_page(page);
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(copy_string_kernel);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-14  7:01 ` [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer Christoph Hellwig
@ 2020-04-14 13:15   ` Arnd Bergmann
  2020-04-15  7:45     ` Christoph Hellwig
  2020-04-15  3:01   ` Michael Ellerman
  1 sibling, 1 reply; 34+ messages in thread
From: Arnd Bergmann @ 2020-04-14 13:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Jeremy Kerr, Eric W . Biederman,
	linuxppc-dev, Linux FS-devel Mailing List, linux-kernel

On Tue, Apr 14, 2020 at 9:02 AM Christoph Hellwig <hch@lst.de> wrote:
>
> Instead of messing with the address limit just open code the trivial
> memcpy + memset logic for the native version, and a call to
> to_compat_siginfo for the compat version.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Nice!

>   */
>  #define user_long_t            compat_long_t
>  #define user_siginfo_t         compat_siginfo_t
> -#define copy_siginfo_to_user   copy_siginfo_to_user32
> +#define fill_siginfo_note(note, csigdata, siginfo)             \
> +do {                                                                   \
> +       to_compat_siginfo(csigdata, siginfo, compat_siginfo_flags());   \
> +       fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); \
> +} while (0)

I don't think you are changing the behavior here, but I still wonder if it
is in fact correct for x32: is in_x32_syscall() true here when dumping an
x32 compat elf process, or should this rather be set according to which
binfmt_elf copy is being used?

     Arnd

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 3/8] signal: replace __copy_siginfo_to_user32 with to_compat_siginfo
  2020-04-14  7:01 ` [PATCH 3/8] signal: replace __copy_siginfo_to_user32 with to_compat_siginfo Christoph Hellwig
@ 2020-04-14 14:00   ` Arnd Bergmann
  0 siblings, 0 replies; 34+ messages in thread
From: Arnd Bergmann @ 2020-04-14 14:00 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Jeremy Kerr, Eric W . Biederman,
	linuxppc-dev, Linux FS-devel Mailing List, linux-kernel

On Tue, Apr 14, 2020 at 9:01 AM Christoph Hellwig <hch@lst.de> wrote:
>
> Move copying the siginfo to userspace into the callers, so that the
> compat_siginfo conversion can be reused by the ELF coredump code without
> set_fs magic.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks all good to me, but I noticed that the naming is now a bit
inconsistent. to_compat_siginfo() is basically the reverse of
post_copy_siginfo_from_user32(), but the names are very different.

I suppose this can always be cleaned up later though, as your
naming choice is more consistent with how things are in the
rest of the kernel these days.

    Arnd

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-14  7:01 ` [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer Christoph Hellwig
  2020-04-14 13:15   ` Arnd Bergmann
@ 2020-04-15  3:01   ` Michael Ellerman
  2020-04-15  6:19     ` Christoph Hellwig
  1 sibling, 1 reply; 34+ messages in thread
From: Michael Ellerman @ 2020-04-15  3:01 UTC (permalink / raw)
  To: Christoph Hellwig, Andrew Morton, Alexander Viro
  Cc: Jeremy Kerr, Arnd Bergmann, Eric W . Biederman, linuxppc-dev,
	linux-fsdevel, linux-kernel

Christoph Hellwig <hch@lst.de> writes:

> Instead of messing with the address limit just open code the trivial
> memcpy + memset logic for the native version, and a call to
> to_compat_siginfo for the compat version.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/binfmt_elf.c        | 9 +++++----
>  fs/compat_binfmt_elf.c | 6 +++++-
>  2 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 13f25e241ac4..607c5a5f855e 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -1553,15 +1553,16 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
>  	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
>  }
>  
> +#ifndef fill_siginfo_note
>  static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
>  		const kernel_siginfo_t *siginfo)
>  {
> -	mm_segment_t old_fs = get_fs();
> -	set_fs(KERNEL_DS);
> -	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
> -	set_fs(old_fs);
> +	memcpy(csigdata, siginfo, sizeof(struct kernel_siginfo));
> +	memset((char *)csigdata + sizeof(struct kernel_siginfo), 0,
> +		SI_EXPANSION_SIZE);
>  	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
>  }
> +#endif
>  
>  #define MAX_FILE_NOTE_SIZE (4*1024*1024)
>  /*
> diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
> index aaad4ca1217e..ab84e095618b 100644
> --- a/fs/compat_binfmt_elf.c
> +++ b/fs/compat_binfmt_elf.c
> @@ -39,7 +39,11 @@
>   */
>  #define user_long_t		compat_long_t
>  #define user_siginfo_t		compat_siginfo_t
> -#define copy_siginfo_to_user	copy_siginfo_to_user32
> +#define fill_siginfo_note(note, csigdata, siginfo)		\
> +do {									\
> +	to_compat_siginfo(csigdata, siginfo, compat_siginfo_flags());	\
> +	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); \
> +} while (0)

This doesn't build on ppc (cell_defconfig):

  ../fs/binfmt_elf.c: In function 'fill_note_info':
  ../fs/compat_binfmt_elf.c:44:39: error: implicit declaration of function 'compat_siginfo_flags'; did you mean 'to_compat_siginfo'? [-Werror=implicit-function-d
  eclaration]
    to_compat_siginfo(csigdata, siginfo, compat_siginfo_flags()); \
                                         ^~~~~~~~~~~~~~~~~~~~
  ../fs/binfmt_elf.c:1846:2: note: in expansion of macro 'fill_siginfo_note'
    fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
    ^~~~~~~~~~~~~~~~~
  cc1: some warnings being treated as errors
  make[2]: *** [../scripts/Makefile.build:266: fs/compat_binfmt_elf.o] Error 1


I guess the empty version from kernel/signal.c needs to move into a
header somewhere.

cheers


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-15  3:01   ` Michael Ellerman
@ 2020-04-15  6:19     ` Christoph Hellwig
  0 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-15  6:19 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Christoph Hellwig, Andrew Morton, Alexander Viro, Jeremy Kerr,
	Arnd Bergmann, Eric W . Biederman, linuxppc-dev, linux-fsdevel,
	linux-kernel

On Wed, Apr 15, 2020 at 01:01:59PM +1000, Michael Ellerman wrote:
> > +	to_compat_siginfo(csigdata, siginfo, compat_siginfo_flags());	\
> > +	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); \
> > +} while (0)
> 
> This doesn't build on ppc (cell_defconfig):
> 
>   ../fs/binfmt_elf.c: In function 'fill_note_info':
>   ../fs/compat_binfmt_elf.c:44:39: error: implicit declaration of function 'compat_siginfo_flags'; did you mean 'to_compat_siginfo'? [-Werror=implicit-function-d
>   eclaration]
>     to_compat_siginfo(csigdata, siginfo, compat_siginfo_flags()); \
>                                          ^~~~~~~~~~~~~~~~~~~~
>   ../fs/binfmt_elf.c:1846:2: note: in expansion of macro 'fill_siginfo_note'
>     fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
>     ^~~~~~~~~~~~~~~~~
>   cc1: some warnings being treated as errors
>   make[2]: *** [../scripts/Makefile.build:266: fs/compat_binfmt_elf.o] Error 1
> 
> 
> I guess the empty version from kernel/signal.c needs to move into a
> header somewhere.

Yes, it should.  Odd that the buildbut hasn't complained yet so far..

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-14 13:15   ` Arnd Bergmann
@ 2020-04-15  7:45     ` Christoph Hellwig
  2020-04-15  8:20       ` Arnd Bergmann
  0 siblings, 1 reply; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-15  7:45 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Christoph Hellwig, Andrew Morton, Alexander Viro, Jeremy Kerr,
	Eric W . Biederman, linuxppc-dev, Linux FS-devel Mailing List,
	linux-kernel

On Tue, Apr 14, 2020 at 03:15:09PM +0200, Arnd Bergmann wrote:
> I don't think you are changing the behavior here, but I still wonder if it
> is in fact correct for x32: is in_x32_syscall() true here when dumping an
> x32 compat elf process, or should this rather be set according to which
> binfmt_elf copy is being used?

The infrastructure cold enable that, although it would require more
arch hooks I think.  I'd rather keep it out of this series and to
an interested party.  Then again x32 doesn't seem to have a whole lot
of interested parties..

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-15  7:45     ` Christoph Hellwig
@ 2020-04-15  8:20       ` Arnd Bergmann
  2020-04-17 13:27         ` Christoph Hellwig
  0 siblings, 1 reply; 34+ messages in thread
From: Arnd Bergmann @ 2020-04-15  8:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Jeremy Kerr, Eric W . Biederman,
	linuxppc-dev, Linux FS-devel Mailing List, linux-kernel

On Wed, Apr 15, 2020 at 9:45 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Tue, Apr 14, 2020 at 03:15:09PM +0200, Arnd Bergmann wrote:
> > I don't think you are changing the behavior here, but I still wonder if it
> > is in fact correct for x32: is in_x32_syscall() true here when dumping an
> > x32 compat elf process, or should this rather be set according to which
> > binfmt_elf copy is being used?
>
> The infrastructure could enable that, although it would require more
> arch hooks I think.

I was more interested in whether you can tell if it's currently broken
or not. If my feeling is right that the current code does the wrong thing
here, it would be good to at least put a FIXME comment in there.

> I'd rather keep it out of this series and to
> an interested party.  Then again x32 doesn't seem to have a whole lot
> of interested parties..

Fine with me. It's on my mental list of things that we want to kill off
eventually as soon as the remaining users stop replying to questions
about it.

In fact I should really turn that into a properly maintained list in
Documentation/... that contains any options that someone has
asked about removing in the past, along with the reasons for keeping
it around and a time at which we should ask about it again.

      Arnd

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-15  8:20       ` Arnd Bergmann
@ 2020-04-17 13:27         ` Christoph Hellwig
  2020-04-17 18:10           ` Eric W. Biederman
  0 siblings, 1 reply; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-17 13:27 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Christoph Hellwig, Andrew Morton, Alexander Viro, Jeremy Kerr,
	Eric W . Biederman, linuxppc-dev, Linux FS-devel Mailing List,
	linux-kernel, x86



On Wed, Apr 15, 2020 at 10:20:11AM +0200, Arnd Bergmann wrote:
> > I'd rather keep it out of this series and to
> > an interested party.  Then again x32 doesn't seem to have a whole lot
> > of interested parties..
> 
> Fine with me. It's on my mental list of things that we want to kill off
> eventually as soon as the remaining users stop replying to questions
> about it.
> 
> In fact I should really turn that into a properly maintained list in
> Documentation/... that contains any options that someone has
> asked about removing in the past, along with the reasons for keeping
> it around and a time at which we should ask about it again.

To the newly added x86 maintainers:  Arnd brought up the point that
elf_core_dump writes the ABI siginfo format into the core dump. That
format differs for i386 vs x32.  Is there any good way to find out
which is the right format when are not in a syscall?

As far a I can tell x32 vs i386 just seems to be based around what
syscall table was used for the current syscall, but core dumps aren't
always in syscall context.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-17 13:27         ` Christoph Hellwig
@ 2020-04-17 18:10           ` Eric W. Biederman
  2020-04-17 20:06             ` Arnd Bergmann
  0 siblings, 1 reply; 34+ messages in thread
From: Eric W. Biederman @ 2020-04-17 18:10 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Arnd Bergmann, Andrew Morton, Alexander Viro, Jeremy Kerr,
	linuxppc-dev, Linux FS-devel Mailing List, linux-kernel, x86

Christoph Hellwig <hch@lst.de> writes:

> On Wed, Apr 15, 2020 at 10:20:11AM +0200, Arnd Bergmann wrote:
>> > I'd rather keep it out of this series and to
>> > an interested party.  Then again x32 doesn't seem to have a whole lot
>> > of interested parties..
>> 
>> Fine with me. It's on my mental list of things that we want to kill off
>> eventually as soon as the remaining users stop replying to questions
>> about it.
>> 
>> In fact I should really turn that into a properly maintained list in
>> Documentation/... that contains any options that someone has
>> asked about removing in the past, along with the reasons for keeping
>> it around and a time at which we should ask about it again.
>
> To the newly added x86 maintainers:  Arnd brought up the point that
> elf_core_dump writes the ABI siginfo format into the core dump. That
> format differs for i386 vs x32.  Is there any good way to find out
> which is the right format when are not in a syscall?
>
> As far a I can tell x32 vs i386 just seems to be based around what
> syscall table was used for the current syscall, but core dumps aren't
> always in syscall context.

I don't think this matters.  The i386 and x32 signal structures
only differ for SIGCHLD.  The SIGCHLD signal does cause coredumps.
So as long as we get the 32bit vs 64bit distinct correct all should be
well.

Eric




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer
  2020-04-17 18:10           ` Eric W. Biederman
@ 2020-04-17 20:06             ` Arnd Bergmann
  0 siblings, 0 replies; 34+ messages in thread
From: Arnd Bergmann @ 2020-04-17 20:06 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Christoph Hellwig, Andrew Morton, Alexander Viro, Jeremy Kerr,
	linuxppc-dev, Linux FS-devel Mailing List, linux-kernel,
	the arch/x86 maintainers

On Fri, Apr 17, 2020 at 8:13 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> Christoph Hellwig <hch@lst.de> writes:
>
> > On Wed, Apr 15, 2020 at 10:20:11AM +0200, Arnd Bergmann wrote:
> >> > I'd rather keep it out of this series and to
> >> > an interested party.  Then again x32 doesn't seem to have a whole lot
> >> > of interested parties..
> >>
> >> Fine with me. It's on my mental list of things that we want to kill off
> >> eventually as soon as the remaining users stop replying to questions
> >> about it.
> >>
> >> In fact I should really turn that into a properly maintained list in
> >> Documentation/... that contains any options that someone has
> >> asked about removing in the past, along with the reasons for keeping
> >> it around and a time at which we should ask about it again.
> >
> > To the newly added x86 maintainers:  Arnd brought up the point that
> > elf_core_dump writes the ABI siginfo format into the core dump. That
> > format differs for i386 vs x32.  Is there any good way to find out
> > which is the right format when are not in a syscall?
> >
> > As far a I can tell x32 vs i386 just seems to be based around what
> > syscall table was used for the current syscall, but core dumps aren't
> > always in syscall context.
>
> I don't think this matters.  The i386 and x32 signal structures
> only differ for SIGCHLD.  The SIGCHLD signal does cause coredumps.
> So as long as we get the 32bit vs 64bit distinct correct all should be
> well.

Ok, makes sense. Thanks for taking a look into this!

      Arnd

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/8] signal: clean up __copy_siginfo_to_user32
  2020-04-14  7:01 ` [PATCH 2/8] signal: clean up __copy_siginfo_to_user32 Christoph Hellwig
@ 2020-04-17 21:08   ` Eric W. Biederman
  2020-04-17 21:09     ` [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32 Eric W. Biederman
                       ` (2 more replies)
  0 siblings, 3 replies; 34+ messages in thread
From: Eric W. Biederman @ 2020-04-17 21:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Jeremy Kerr, Arnd Bergmann,
	linuxppc-dev, linux-fsdevel, linux-kernel, x86

Christoph Hellwig <hch@lst.de> writes:

> Instead of an architecture specific calling convention in common code
> just pass a flags argument with architecture specific values.

This bothers me because it makes all architectures pay for the sins of
x32.  Further it starts burying the details of the what is happening in
architecture specific helpers.  Hiding the fact that there is only
one niche architecture that does anything weird.

I am very sensitive to hiding away signal handling details right now
because way to much of the signal handling code got hidden in
architecture specific files and was quite buggy because as a result.

My general sense is putting all of the weird details up front and center
in kernel/signal.c is the only way for this code will be looked at
and successfully maintained.

How about these patches to solve set_fs with binfmt_elf instead:

Eric W. Biederman (2):
      signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32
      signal: Remove the set_fs in binfmt_elf.c:fill_siginfo_note

 fs/binfmt_elf.c        |   5 +----
 fs/compat_binfmt_elf.c |   2 +-
 include/linux/compat.h |   1 +
 include/linux/signal.h |   7 +++++++
 kernel/signal.c        | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------

Eric

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32
  2020-04-17 21:08   ` Eric W. Biederman
@ 2020-04-17 21:09     ` Eric W. Biederman
  2020-04-18  8:05       ` Christophe Leroy
  2020-04-17 21:09     ` [PATCH 2/2] signal: Remove the set_fs in binfmt_elf.c:fill_siginfo_note Eric W. Biederman
  2020-04-19  8:03     ` [PATCH 2/8] signal: clean up __copy_siginfo_to_user32 Christoph Hellwig
  2 siblings, 1 reply; 34+ messages in thread
From: Eric W. Biederman @ 2020-04-17 21:09 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Jeremy Kerr, Arnd Bergmann,
	linuxppc-dev, linux-fsdevel, linux-kernel, x86


To remove the use of set_fs in the coredump code there needs to be a
way to convert a kernel siginfo to a userspace compat siginfo.

Call that function copy_siginfo_to_compat and factor it out of
copy_siginfo_to_user32.

The existence of x32 complicates this code.  On x32 SIGCHLD uses 64bit
times for utime and stime.  As only SIGCHLD is affected and SIGCHLD
never causes a coredump I have avoided handling that case.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/compat.h |   1 +
 kernel/signal.c        | 108 +++++++++++++++++++++++------------------
 2 files changed, 63 insertions(+), 46 deletions(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0480ba4db592..4962b254e550 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -402,6 +402,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size);
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
+void copy_siginfo_to_external32(struct compat_siginfo *to, const struct kernel_siginfo *from);
 int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from);
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
diff --git a/kernel/signal.c b/kernel/signal.c
index e58a6c619824..578f196898cb 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3235,90 +3235,106 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
 }
 
 #ifdef CONFIG_COMPAT
-int copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			   const struct kernel_siginfo *from)
-#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
+void copy_siginfo_to_external32(struct compat_siginfo *to,
+				const struct kernel_siginfo *from)
 {
-	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
-}
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			     const struct kernel_siginfo *from, bool x32_ABI)
-#endif
-{
-	struct compat_siginfo new;
-	memset(&new, 0, sizeof(new));
+	/*
+	 * This function does not work properly for SIGCHLD on x32,
+	 * but it does not need to as SIGCHLD never causes a coredump.
+	 */
+	memset(to, 0, sizeof(*to));
 
-	new.si_signo = from->si_signo;
-	new.si_errno = from->si_errno;
-	new.si_code  = from->si_code;
+	to->si_signo = from->si_signo;
+	to->si_errno = from->si_errno;
+	to->si_code  = from->si_code;
 	switch(siginfo_layout(from->si_signo, from->si_code)) {
 	case SIL_KILL:
-		new.si_pid = from->si_pid;
-		new.si_uid = from->si_uid;
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
 		break;
 	case SIL_TIMER:
-		new.si_tid     = from->si_tid;
-		new.si_overrun = from->si_overrun;
-		new.si_int     = from->si_int;
+		to->si_tid     = from->si_tid;
+		to->si_overrun = from->si_overrun;
+		to->si_int     = from->si_int;
 		break;
 	case SIL_POLL:
-		new.si_band = from->si_band;
-		new.si_fd   = from->si_fd;
+		to->si_band = from->si_band;
+		to->si_fd   = from->si_fd;
 		break;
 	case SIL_FAULT:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
 		break;
 	case SIL_FAULT_MCEERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_addr_lsb = from->si_addr_lsb;
+		to->si_addr_lsb = from->si_addr_lsb;
 		break;
 	case SIL_FAULT_BNDERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_lower = ptr_to_compat(from->si_lower);
-		new.si_upper = ptr_to_compat(from->si_upper);
+		to->si_lower = ptr_to_compat(from->si_lower);
+		to->si_upper = ptr_to_compat(from->si_upper);
 		break;
 	case SIL_FAULT_PKUERR:
-		new.si_addr = ptr_to_compat(from->si_addr);
+		to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-		new.si_trapno = from->si_trapno;
+		to->si_trapno = from->si_trapno;
 #endif
-		new.si_pkey = from->si_pkey;
+		to->si_pkey = from->si_pkey;
 		break;
 	case SIL_CHLD:
-		new.si_pid    = from->si_pid;
-		new.si_uid    = from->si_uid;
-		new.si_status = from->si_status;
+		to->si_pid    = from->si_pid;
+		to->si_uid    = from->si_uid;
+		to->si_status = from->si_status;
+		to->si_utime = from->si_utime;
+		to->si_stime = from->si_stime;
 #ifdef CONFIG_X86_X32_ABI
 		if (x32_ABI) {
-			new._sifields._sigchld_x32._utime = from->si_utime;
-			new._sifields._sigchld_x32._stime = from->si_stime;
+			to->_sifields._sigchld_x32._utime = from->si_utime;
+			to->_sifields._sigchld_x32._stime = from->si_stime;
 		} else
 #endif
 		{
-			new.si_utime = from->si_utime;
-			new.si_stime = from->si_stime;
 		}
 		break;
 	case SIL_RT:
-		new.si_pid = from->si_pid;
-		new.si_uid = from->si_uid;
-		new.si_int = from->si_int;
+		to->si_pid = from->si_pid;
+		to->si_uid = from->si_uid;
+		to->si_int = from->si_int;
 		break;
 	case SIL_SYS:
-		new.si_call_addr = ptr_to_compat(from->si_call_addr);
-		new.si_syscall   = from->si_syscall;
-		new.si_arch      = from->si_arch;
+		to->si_call_addr = ptr_to_compat(from->si_call_addr);
+		to->si_syscall   = from->si_syscall;
+		to->si_arch      = from->si_arch;
 		break;
 	}
+}
+
+int copy_siginfo_to_user32(struct compat_siginfo __user *to,
+			   const struct kernel_siginfo *from)
+#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
+{
+	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
+}
+int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
+			     const struct kernel_siginfo *from, bool x32_ABI)
+#endif
+{
+	struct compat_siginfo new;
+	copy_siginfo_to_external32(&new, from);
+#ifdef CONFIG_X86_X32_ABI
+	if (x32_ABI && from->si_signo == SIGCHLD) {
+		new._sifields._sigchld_x32._utime = from->si_utime;
+		new._sifields._sigchld_x32._stime = from->si_stime;
+	}
+#endif
 
 	if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
 		return -EFAULT;
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 2/2] signal: Remove the set_fs in binfmt_elf.c:fill_siginfo_note
  2020-04-17 21:08   ` Eric W. Biederman
  2020-04-17 21:09     ` [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32 Eric W. Biederman
@ 2020-04-17 21:09     ` Eric W. Biederman
  2020-04-19  8:03     ` [PATCH 2/8] signal: clean up __copy_siginfo_to_user32 Christoph Hellwig
  2 siblings, 0 replies; 34+ messages in thread
From: Eric W. Biederman @ 2020-04-17 21:09 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Jeremy Kerr, Arnd Bergmann,
	linuxppc-dev, linux-fsdevel, linux-kernel, x86


The code in binfmt_elf.c is differnt from the rest of the code that
processes siginfo, as it sends siginfo from a kernel buffer to a file
rather than from kernel memory to userspace buffers.  To remove it's
use of set_fs the code needs some different siginfo helpers.

Add the helper copy_siginfo_to_external to copy from the kernel's
internal siginfo layout to a buffer in the siginfo layout that
userspace expects.

Modify fill_siginfo_note to use copy_siginfo_to_external instead of
set_fs and copy_siginfo_to_user.

Update compat_binfmt_elf.c to use the previously added
copy_siginfo_to_external32 to handle the compat case.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/binfmt_elf.c        | 5 +----
 fs/compat_binfmt_elf.c | 2 +-
 include/linux/signal.h | 7 +++++++
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 13f25e241ac4..a1f57e20c3cf 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1556,10 +1556,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
 		const kernel_siginfo_t *siginfo)
 {
-	mm_segment_t old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
-	set_fs(old_fs);
+	copy_siginfo_to_external(csigdata, siginfo);
 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
 }
 
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index aaad4ca1217e..fa0e24e1b726 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -39,7 +39,7 @@
  */
 #define user_long_t		compat_long_t
 #define user_siginfo_t		compat_siginfo_t
-#define copy_siginfo_to_user	copy_siginfo_to_user32
+#define copy_siginfo_to_external	copy_siginfo_to_external32
 
 /*
  * The machine-dependent core note format types are defined in elfcore-compat.h,
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 05bacd2ab135..c1796321cadb 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -24,6 +24,13 @@ static inline void clear_siginfo(kernel_siginfo_t *info)
 
 #define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct kernel_siginfo))
 
+static inline void copy_siginfo_to_external(siginfo_t *to,
+					    const kernel_siginfo_t *from)
+{
+	memcpy(to, from, sizeof(*from));
+	memset(((char *)to) + sizeof(struct kernel_siginfo), 0, SI_EXPANSION_SIZE);
+}
+
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from);
 int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from);
 
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: remove set_fs calls from the exec and coredump code v2
  2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2020-04-14  7:01 ` [PATCH 8/8] exec: open code copy_string_kernel Christoph Hellwig
@ 2020-04-17 22:41 ` Eric W. Biederman
  2020-04-19  8:19   ` Christoph Hellwig
  8 siblings, 1 reply; 34+ messages in thread
From: Eric W. Biederman @ 2020-04-17 22:41 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Jeremy Kerr, Arnd Bergmann,
	linuxppc-dev, linux-fsdevel, linux-kernel

Christoph Hellwig <hch@lst.de> writes:

> Hi all,
>
> this series gets rid of playing with the address limit in the exec and
> coredump code.  Most of this was fairly trivial, the biggest changes are
> those to the spufs coredump code.
>
> Changes since v1:
>  - properly spell NUL
>  - properly handle the compat siginfo case in ELF coredumps

Quick question is exec from a kernel thread within the scope of what you
are looking at?

There is a set_fs(USER_DS) in flush_old_exec whose sole purpose appears
to be to allow exec from kernel threads.  Where the kernel threads
run with set_fs(KERNEL_DS) until they call exec.

Eric

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32
  2020-04-17 21:09     ` [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32 Eric W. Biederman
@ 2020-04-18  8:05       ` Christophe Leroy
  2020-04-18 11:55         ` Eric W. Biederman
  2020-04-19  8:05         ` Christoph Hellwig
  0 siblings, 2 replies; 34+ messages in thread
From: Christophe Leroy @ 2020-04-18  8:05 UTC (permalink / raw)
  To: Eric W. Biederman, Christoph Hellwig
  Cc: Arnd Bergmann, x86, linux-kernel, Alexander Viro, linux-fsdevel,
	Andrew Morton, linuxppc-dev, Jeremy Kerr



Le 17/04/2020 à 23:09, Eric W. Biederman a écrit :
> 
> To remove the use of set_fs in the coredump code there needs to be a
> way to convert a kernel siginfo to a userspace compat siginfo.
> 
> Call that function copy_siginfo_to_compat and factor it out of
> copy_siginfo_to_user32.

I find it a pitty to do that.

The existing function could have been easily converted to using 
user_access_begin() + user_access_end() and use unsafe_put_user() to 
copy to userspace to avoid copying through a temporary structure on the 
stack.

With your change, it becomes impossible to do that.

Is that really an issue to use that set_fs() in the coredump code ?

Christophe

> 
> The existence of x32 complicates this code.  On x32 SIGCHLD uses 64bit
> times for utime and stime.  As only SIGCHLD is affected and SIGCHLD
> never causes a coredump I have avoided handling that case.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>   include/linux/compat.h |   1 +
>   kernel/signal.c        | 108 +++++++++++++++++++++++------------------
>   2 files changed, 63 insertions(+), 46 deletions(-)
> 
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index 0480ba4db592..4962b254e550 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -402,6 +402,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
>   		       unsigned long bitmap_size);
>   long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
>   		       unsigned long bitmap_size);
> +void copy_siginfo_to_external32(struct compat_siginfo *to, const struct kernel_siginfo *from);
>   int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from);
>   int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from);
>   int get_compat_sigevent(struct sigevent *event,
> diff --git a/kernel/signal.c b/kernel/signal.c
> index e58a6c619824..578f196898cb 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -3235,90 +3235,106 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
>   }
>   
>   #ifdef CONFIG_COMPAT
> -int copy_siginfo_to_user32(struct compat_siginfo __user *to,
> -			   const struct kernel_siginfo *from)
> -#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
> +void copy_siginfo_to_external32(struct compat_siginfo *to,
> +				const struct kernel_siginfo *from)
>   {
> -	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
> -}
> -int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
> -			     const struct kernel_siginfo *from, bool x32_ABI)
> -#endif
> -{
> -	struct compat_siginfo new;
> -	memset(&new, 0, sizeof(new));
> +	/*
> +	 * This function does not work properly for SIGCHLD on x32,
> +	 * but it does not need to as SIGCHLD never causes a coredump.
> +	 */
> +	memset(to, 0, sizeof(*to));
>   
> -	new.si_signo = from->si_signo;
> -	new.si_errno = from->si_errno;
> -	new.si_code  = from->si_code;
> +	to->si_signo = from->si_signo;
> +	to->si_errno = from->si_errno;
> +	to->si_code  = from->si_code;
>   	switch(siginfo_layout(from->si_signo, from->si_code)) {
>   	case SIL_KILL:
> -		new.si_pid = from->si_pid;
> -		new.si_uid = from->si_uid;
> +		to->si_pid = from->si_pid;
> +		to->si_uid = from->si_uid;
>   		break;
>   	case SIL_TIMER:
> -		new.si_tid     = from->si_tid;
> -		new.si_overrun = from->si_overrun;
> -		new.si_int     = from->si_int;
> +		to->si_tid     = from->si_tid;
> +		to->si_overrun = from->si_overrun;
> +		to->si_int     = from->si_int;
>   		break;
>   	case SIL_POLL:
> -		new.si_band = from->si_band;
> -		new.si_fd   = from->si_fd;
> +		to->si_band = from->si_band;
> +		to->si_fd   = from->si_fd;
>   		break;
>   	case SIL_FAULT:
> -		new.si_addr = ptr_to_compat(from->si_addr);
> +		to->si_addr = ptr_to_compat(from->si_addr);
>   #ifdef __ARCH_SI_TRAPNO
> -		new.si_trapno = from->si_trapno;
> +		to->si_trapno = from->si_trapno;
>   #endif
>   		break;
>   	case SIL_FAULT_MCEERR:
> -		new.si_addr = ptr_to_compat(from->si_addr);
> +		to->si_addr = ptr_to_compat(from->si_addr);
>   #ifdef __ARCH_SI_TRAPNO
> -		new.si_trapno = from->si_trapno;
> +		to->si_trapno = from->si_trapno;
>   #endif
> -		new.si_addr_lsb = from->si_addr_lsb;
> +		to->si_addr_lsb = from->si_addr_lsb;
>   		break;
>   	case SIL_FAULT_BNDERR:
> -		new.si_addr = ptr_to_compat(from->si_addr);
> +		to->si_addr = ptr_to_compat(from->si_addr);
>   #ifdef __ARCH_SI_TRAPNO
> -		new.si_trapno = from->si_trapno;
> +		to->si_trapno = from->si_trapno;
>   #endif
> -		new.si_lower = ptr_to_compat(from->si_lower);
> -		new.si_upper = ptr_to_compat(from->si_upper);
> +		to->si_lower = ptr_to_compat(from->si_lower);
> +		to->si_upper = ptr_to_compat(from->si_upper);
>   		break;
>   	case SIL_FAULT_PKUERR:
> -		new.si_addr = ptr_to_compat(from->si_addr);
> +		to->si_addr = ptr_to_compat(from->si_addr);
>   #ifdef __ARCH_SI_TRAPNO
> -		new.si_trapno = from->si_trapno;
> +		to->si_trapno = from->si_trapno;
>   #endif
> -		new.si_pkey = from->si_pkey;
> +		to->si_pkey = from->si_pkey;
>   		break;
>   	case SIL_CHLD:
> -		new.si_pid    = from->si_pid;
> -		new.si_uid    = from->si_uid;
> -		new.si_status = from->si_status;
> +		to->si_pid    = from->si_pid;
> +		to->si_uid    = from->si_uid;
> +		to->si_status = from->si_status;
> +		to->si_utime = from->si_utime;
> +		to->si_stime = from->si_stime;
>   #ifdef CONFIG_X86_X32_ABI
>   		if (x32_ABI) {
> -			new._sifields._sigchld_x32._utime = from->si_utime;
> -			new._sifields._sigchld_x32._stime = from->si_stime;
> +			to->_sifields._sigchld_x32._utime = from->si_utime;
> +			to->_sifields._sigchld_x32._stime = from->si_stime;
>   		} else
>   #endif
>   		{
> -			new.si_utime = from->si_utime;
> -			new.si_stime = from->si_stime;
>   		}
>   		break;
>   	case SIL_RT:
> -		new.si_pid = from->si_pid;
> -		new.si_uid = from->si_uid;
> -		new.si_int = from->si_int;
> +		to->si_pid = from->si_pid;
> +		to->si_uid = from->si_uid;
> +		to->si_int = from->si_int;
>   		break;
>   	case SIL_SYS:
> -		new.si_call_addr = ptr_to_compat(from->si_call_addr);
> -		new.si_syscall   = from->si_syscall;
> -		new.si_arch      = from->si_arch;
> +		to->si_call_addr = ptr_to_compat(from->si_call_addr);
> +		to->si_syscall   = from->si_syscall;
> +		to->si_arch      = from->si_arch;
>   		break;
>   	}
> +}
> +
> +int copy_siginfo_to_user32(struct compat_siginfo __user *to,
> +			   const struct kernel_siginfo *from)
> +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
> +{
> +	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
> +}
> +int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
> +			     const struct kernel_siginfo *from, bool x32_ABI)
> +#endif
> +{
> +	struct compat_siginfo new;
> +	copy_siginfo_to_external32(&new, from);
> +#ifdef CONFIG_X86_X32_ABI
> +	if (x32_ABI && from->si_signo == SIGCHLD) {
> +		new._sifields._sigchld_x32._utime = from->si_utime;
> +		new._sifields._sigchld_x32._stime = from->si_stime;
> +	}
> +#endif
>   
>   	if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
>   		return -EFAULT;
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 8/8] exec: open code copy_string_kernel
  2020-04-14  7:01 ` [PATCH 8/8] exec: open code copy_string_kernel Christoph Hellwig
@ 2020-04-18  8:15   ` Christophe Leroy
  2020-04-19  8:06     ` Christoph Hellwig
  0 siblings, 1 reply; 34+ messages in thread
From: Christophe Leroy @ 2020-04-18  8:15 UTC (permalink / raw)
  To: Christoph Hellwig, Andrew Morton, Alexander Viro
  Cc: Arnd Bergmann, linux-kernel, Jeremy Kerr, linux-fsdevel,
	linuxppc-dev, Eric W . Biederman



Le 14/04/2020 à 09:01, Christoph Hellwig a écrit :
> Currently copy_string_kernel is just a wrapper around copy_strings that
> simplifies the calling conventions and uses set_fs to allow passing a
> kernel pointer.  But due to the fact the we only need to handle a single
> kernel argument pointer, the logic can be sigificantly simplified while
> getting rid of the set_fs.


Instead of duplicating almost identical code, can you write a function 
that takes whether the source is from user or from kernel, then you just 
do things like:

	if (from_user)
		len = strnlen_user(str, MAX_ARG_STRLEN);
	else
		len = strnlen(str, MAX_ARG_STRLEN);


	if (from_user)
		copy_from_user(kaddr+offset, str, bytes_to_copy);
	else
		memcpy(kaddr+offset, str, bytes_to_copy);

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/exec.c | 43 ++++++++++++++++++++++++++++++++++---------
>   1 file changed, 34 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index b2a77d5acede..ea90af1fb236 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -592,17 +592,42 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
>    */
>   int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
>   {
> -	int r;
> -	mm_segment_t oldfs = get_fs();
> -	struct user_arg_ptr argv = {
> -		.ptr.native = (const char __user *const  __user *)&arg,
> -	};
> +	int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
> +	unsigned long pos = bprm->p;
> +
> +	if (len == 0)
> +		return -EFAULT;
> +	if (!valid_arg_len(bprm, len))
> +		return -E2BIG;
> +
> +	/* We're going to work our way backwards. */
> +	arg += len;
> +	bprm->p -= len;
> +	if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
> +		return -E2BIG;
> +
> +	while (len > 0) {
> +		unsigned int bytes_to_copy = min_t(unsigned int, len,
> +				min_not_zero(offset_in_page(pos), PAGE_SIZE));
> +		struct page *page;
> +		char *kaddr;
>   
> -	set_fs(KERNEL_DS);
> -	r = copy_strings(1, argv, bprm);
> -	set_fs(oldfs);
> +		pos -= bytes_to_copy;
> +		arg -= bytes_to_copy;
> +		len -= bytes_to_copy;
>   
> -	return r;
> +		page = get_arg_page(bprm, pos, 1);
> +		if (!page)
> +			return -E2BIG;
> +		kaddr = kmap_atomic(page);
> +		flush_arg_page(bprm, pos & PAGE_MASK, page);
> +		memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy);
> +		flush_kernel_dcache_page(page);
> +		kunmap_atomic(kaddr);
> +		put_arg_page(page);
> +	}
> +
> +	return 0;
>   }
>   EXPORT_SYMBOL(copy_string_kernel);
>   
> 

Christophe

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32
  2020-04-18  8:05       ` Christophe Leroy
@ 2020-04-18 11:55         ` Eric W. Biederman
  2020-04-19  8:13           ` Christoph Hellwig
  2020-04-19  9:54           ` Christophe Leroy
  2020-04-19  8:05         ` Christoph Hellwig
  1 sibling, 2 replies; 34+ messages in thread
From: Eric W. Biederman @ 2020-04-18 11:55 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Christoph Hellwig, Arnd Bergmann, x86, linux-kernel,
	Alexander Viro, linux-fsdevel, Andrew Morton, linuxppc-dev,
	Jeremy Kerr

Christophe Leroy <christophe.leroy@c-s.fr> writes:

> Le 17/04/2020 à 23:09, Eric W. Biederman a écrit :
>>
>> To remove the use of set_fs in the coredump code there needs to be a
>> way to convert a kernel siginfo to a userspace compat siginfo.
>>
>> Call that function copy_siginfo_to_compat and factor it out of
>> copy_siginfo_to_user32.
>
> I find it a pitty to do that.
>
> The existing function could have been easily converted to using
> user_access_begin() + user_access_end() and use unsafe_put_user() to copy to
> userspace to avoid copying through a temporary structure on the stack.
>
> With your change, it becomes impossible to do that.

I don't follow.  You don't like temporary structures in the coredump
code or temporary structures in copy_siginfo_to_user32?

A temporary structure in copy_siginfo_to_user is pretty much required
so that it can be zeroed to guarantee we don't pass a structure with
holes to userspace.

The implementation of copy_siginfo_to_user32 used to use the equivalent
of user_access_begin() and user_access_end() and the code was a mess
that was very difficult to reason about.  I recall their being holes
in the structure that were being copied to userspace.

Meanwhile if you are going to set all of the bytes a cache hot temporary
structure is quite cheap.

> Is that really an issue to use that set_fs() in the coredump code ?

Using set_fs() is pretty bad and something that we would like to remove
from the kernel entirely.  The fewer instances of set_fs() we have the
better.

I forget all of the details but set_fs() is both a type violation and an
attack point when people are attacking the kernel.  The existence of
set_fs() requires somethings that should be constants to be variables.
Something about that means that our current code is difficult to protect
from spectre style vulnerabilities.

There was a very good thread about it all in I think 2018 but
unfortunately I can't find it now.

Eric

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 2/8] signal: clean up __copy_siginfo_to_user32
  2020-04-17 21:08   ` Eric W. Biederman
  2020-04-17 21:09     ` [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32 Eric W. Biederman
  2020-04-17 21:09     ` [PATCH 2/2] signal: Remove the set_fs in binfmt_elf.c:fill_siginfo_note Eric W. Biederman
@ 2020-04-19  8:03     ` Christoph Hellwig
  2 siblings, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-19  8:03 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Christoph Hellwig, Andrew Morton, Alexander Viro, Jeremy Kerr,
	Arnd Bergmann, linuxppc-dev, linux-fsdevel, linux-kernel, x86

On Fri, Apr 17, 2020 at 04:08:23PM -0500, Eric W. Biederman wrote:
> This bothers me because it makes all architectures pay for the sins of
> x32.  Further it starts burying the details of the what is happening in
> architecture specific helpers.  Hiding the fact that there is only
> one niche architecture that does anything weird.
> 
> I am very sensitive to hiding away signal handling details right now
> because way to much of the signal handling code got hidden in
> architecture specific files and was quite buggy because as a result.

I much prefer the unconditional flags over the crazy ifdef mess in
the current coe and your version.  Except for that and the rather
strange and confusing copy_siginfo_to_external32 name it pretty much
looks the same.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32
  2020-04-18  8:05       ` Christophe Leroy
  2020-04-18 11:55         ` Eric W. Biederman
@ 2020-04-19  8:05         ` Christoph Hellwig
  1 sibling, 0 replies; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-19  8:05 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Eric W. Biederman, Christoph Hellwig, Arnd Bergmann, x86,
	linux-kernel, Alexander Viro, linux-fsdevel, Andrew Morton,
	linuxppc-dev, Jeremy Kerr

On Sat, Apr 18, 2020 at 10:05:19AM +0200, Christophe Leroy wrote:
>
>
> Le 17/04/2020 à 23:09, Eric W. Biederman a écrit :
>>
>> To remove the use of set_fs in the coredump code there needs to be a
>> way to convert a kernel siginfo to a userspace compat siginfo.
>>
>> Call that function copy_siginfo_to_compat and factor it out of
>> copy_siginfo_to_user32.
>
> I find it a pitty to do that.
>
> The existing function could have been easily converted to using 
> user_access_begin() + user_access_end() and use unsafe_put_user() to copy 
> to userspace to avoid copying through a temporary structure on the stack.
>
> With your change, it becomes impossible to do that.

As Eric said we need a struct to clear all padding.  Note that I
though about converting to unsafe_copy_to_user in my variant as we
can pretty easily do that if pre-filling the structure earlier.  But
I didn't want to throw in such unrelated changes for now - I'll volunteer
to do it later, though.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 8/8] exec: open code copy_string_kernel
  2020-04-18  8:15   ` Christophe Leroy
@ 2020-04-19  8:06     ` Christoph Hellwig
  2020-04-19  9:44       ` Christophe Leroy
  0 siblings, 1 reply; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-19  8:06 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Christoph Hellwig, Andrew Morton, Alexander Viro, Arnd Bergmann,
	linux-kernel, Jeremy Kerr, linux-fsdevel, linuxppc-dev,
	Eric W . Biederman

On Sat, Apr 18, 2020 at 10:15:42AM +0200, Christophe Leroy wrote:
>
>
> Le 14/04/2020 à 09:01, Christoph Hellwig a écrit :
>> Currently copy_string_kernel is just a wrapper around copy_strings that
>> simplifies the calling conventions and uses set_fs to allow passing a
>> kernel pointer.  But due to the fact the we only need to handle a single
>> kernel argument pointer, the logic can be sigificantly simplified while
>> getting rid of the set_fs.
>
>
> Instead of duplicating almost identical code, can you write a function that 
> takes whether the source is from user or from kernel, then you just do 
> things like:
>
> 	if (from_user)
> 		len = strnlen_user(str, MAX_ARG_STRLEN);
> 	else
> 		len = strnlen(str, MAX_ARG_STRLEN);
>
>
> 	if (from_user)
> 		copy_from_user(kaddr+offset, str, bytes_to_copy);
> 	else
> 		memcpy(kaddr+offset, str, bytes_to_copy);

We'll need two different str variables then with and without __user
annotations to keep type safety.  And introduce a branch-y and unreadable
mess in the exec fast path instead of adding a simple and well understood
function for the kernel case that just deals with the much simpler case
of just copying a single arg vector from a kernel address.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32
  2020-04-18 11:55         ` Eric W. Biederman
@ 2020-04-19  8:13           ` Christoph Hellwig
  2020-04-19  9:46             ` Christophe Leroy
  2020-04-19  9:54           ` Christophe Leroy
  1 sibling, 1 reply; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-19  8:13 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Christophe Leroy, Christoph Hellwig, Arnd Bergmann, x86,
	linux-kernel, Alexander Viro, linux-fsdevel, Andrew Morton,
	linuxppc-dev, Jeremy Kerr

On Sat, Apr 18, 2020 at 06:55:56AM -0500, Eric W. Biederman wrote:
> > Is that really an issue to use that set_fs() in the coredump code ?
> 
> Using set_fs() is pretty bad and something that we would like to remove
> from the kernel entirely.  The fewer instances of set_fs() we have the
> better.
> 
> I forget all of the details but set_fs() is both a type violation and an
> attack point when people are attacking the kernel.  The existence of
> set_fs() requires somethings that should be constants to be variables.
> Something about that means that our current code is difficult to protect
> from spectre style vulnerabilities.

Yes, set_fs requires variable based address checking in the uaccess
routines for architectures with a shared address space, or even entirely
different code for architectures with separate kernel and user address
spaces.  My plan is to hopefully kill set_fs in its current form a few
merge windows down the road.  We'll probably still need some form of
it to e.g. mark a thread as kernel thread vs also being able to execute
user code, but it will be much ore limited than before, called from very
few places and actually be a no-op for many architectures.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: remove set_fs calls from the exec and coredump code v2
  2020-04-17 22:41 ` remove set_fs calls from the exec and coredump code v2 Eric W. Biederman
@ 2020-04-19  8:19   ` Christoph Hellwig
  2020-04-19 11:50     ` Eric W. Biederman
  0 siblings, 1 reply; 34+ messages in thread
From: Christoph Hellwig @ 2020-04-19  8:19 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Christoph Hellwig, Andrew Morton, Alexander Viro, Jeremy Kerr,
	Arnd Bergmann, linuxppc-dev, linux-fsdevel, linux-kernel

On Fri, Apr 17, 2020 at 05:41:52PM -0500, Eric W. Biederman wrote:
> > this series gets rid of playing with the address limit in the exec and
> > coredump code.  Most of this was fairly trivial, the biggest changes are
> > those to the spufs coredump code.
> >
> > Changes since v1:
> >  - properly spell NUL
> >  - properly handle the compat siginfo case in ELF coredumps
> 
> Quick question is exec from a kernel thread within the scope of what you
> are looking at?
> 
> There is a set_fs(USER_DS) in flush_old_exec whose sole purpose appears
> to be to allow exec from kernel threads.  Where the kernel threads
> run with set_fs(KERNEL_DS) until they call exec.

This series doesn't really look at that area.  But I don't think exec
from a kernel thread makes any sense, and cleaning up how to set the
initial USER_DS vs KERNEL_DS state is something I'll eventually get to,
it seems like a major mess at the moment.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 8/8] exec: open code copy_string_kernel
  2020-04-19  8:06     ` Christoph Hellwig
@ 2020-04-19  9:44       ` Christophe Leroy
  0 siblings, 0 replies; 34+ messages in thread
From: Christophe Leroy @ 2020-04-19  9:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Arnd Bergmann, linux-kernel,
	Jeremy Kerr, linux-fsdevel, linuxppc-dev, Eric W . Biederman



Le 19/04/2020 à 10:06, Christoph Hellwig a écrit :
> On Sat, Apr 18, 2020 at 10:15:42AM +0200, Christophe Leroy wrote:
>>
>>
>> Le 14/04/2020 à 09:01, Christoph Hellwig a écrit :
>>> Currently copy_string_kernel is just a wrapper around copy_strings that
>>> simplifies the calling conventions and uses set_fs to allow passing a
>>> kernel pointer.  But due to the fact the we only need to handle a single
>>> kernel argument pointer, the logic can be sigificantly simplified while
>>> getting rid of the set_fs.
>>
>>
>> Instead of duplicating almost identical code, can you write a function that
>> takes whether the source is from user or from kernel, then you just do
>> things like:
>>
>> 	if (from_user)
>> 		len = strnlen_user(str, MAX_ARG_STRLEN);
>> 	else
>> 		len = strnlen(str, MAX_ARG_STRLEN);
>>
>>
>> 	if (from_user)
>> 		copy_from_user(kaddr+offset, str, bytes_to_copy);
>> 	else
>> 		memcpy(kaddr+offset, str, bytes_to_copy);
> 
> We'll need two different str variables then with and without __user
> annotations to keep type safety.  And introduce a branch-y and unreadable
> mess in the exec fast path instead of adding a simple and well understood
> function for the kernel case that just deals with the much simpler case
> of just copying a single arg vector from a kernel address.
> 

About the branch, I was expecting GCC to inline and eliminate the unused 
branch.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32
  2020-04-19  8:13           ` Christoph Hellwig
@ 2020-04-19  9:46             ` Christophe Leroy
  0 siblings, 0 replies; 34+ messages in thread
From: Christophe Leroy @ 2020-04-19  9:46 UTC (permalink / raw)
  To: Christoph Hellwig, Eric W. Biederman
  Cc: Arnd Bergmann, x86, linux-kernel, Alexander Viro, linux-fsdevel,
	Andrew Morton, linuxppc-dev, Jeremy Kerr



Le 19/04/2020 à 10:13, Christoph Hellwig a écrit :
> On Sat, Apr 18, 2020 at 06:55:56AM -0500, Eric W. Biederman wrote:
>>> Is that really an issue to use that set_fs() in the coredump code ?
>>
>> Using set_fs() is pretty bad and something that we would like to remove
>> from the kernel entirely.  The fewer instances of set_fs() we have the
>> better.
>>
>> I forget all of the details but set_fs() is both a type violation and an
>> attack point when people are attacking the kernel.  The existence of
>> set_fs() requires somethings that should be constants to be variables.
>> Something about that means that our current code is difficult to protect
>> from spectre style vulnerabilities.
> 
> Yes, set_fs requires variable based address checking in the uaccess
> routines for architectures with a shared address space, or even entirely
> different code for architectures with separate kernel and user address
> spaces.  My plan is to hopefully kill set_fs in its current form a few
> merge windows down the road.  We'll probably still need some form of
> it to e.g. mark a thread as kernel thread vs also being able to execute
> user code, but it will be much ore limited than before, called from very
> few places and actually be a no-op for many architectures.
> 

Oh nice. Some time ago I proposed a patch to change set_fs() to a 
flip/flop flag based logic, see 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/dd2876b808ea38eb7b7f760ecd6ce06096c61fb5.1580295551.git.christophe.leroy@c-s.fr/

But if we manage to get rid of it completely, that's even better.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32
  2020-04-18 11:55         ` Eric W. Biederman
  2020-04-19  8:13           ` Christoph Hellwig
@ 2020-04-19  9:54           ` Christophe Leroy
  1 sibling, 0 replies; 34+ messages in thread
From: Christophe Leroy @ 2020-04-19  9:54 UTC (permalink / raw)
  To: Eric W. Biederman, Christoph Hellwig
  Cc: Arnd Bergmann, x86, linux-kernel, Alexander Viro, linux-fsdevel,
	Andrew Morton, linuxppc-dev, Jeremy Kerr



Le 18/04/2020 à 13:55, Eric W. Biederman a écrit :
> Christophe Leroy <christophe.leroy@c-s.fr> writes:
> 
>> Le 17/04/2020 à 23:09, Eric W. Biederman a écrit :
>>>
>>> To remove the use of set_fs in the coredump code there needs to be a
>>> way to convert a kernel siginfo to a userspace compat siginfo.
>>>
>>> Call that function copy_siginfo_to_compat and factor it out of
>>> copy_siginfo_to_user32.
>>
>> I find it a pitty to do that.
>>
>> The existing function could have been easily converted to using
>> user_access_begin() + user_access_end() and use unsafe_put_user() to copy to
>> userspace to avoid copying through a temporary structure on the stack.
>>
>> With your change, it becomes impossible to do that.
> 
> I don't follow.  You don't like temporary structures in the coredump
> code or temporary structures in copy_siginfo_to_user32?

In copy_siginfo_to_user32()

> 
> A temporary structure in copy_siginfo_to_user is pretty much required
> so that it can be zeroed to guarantee we don't pass a structure with
> holes to userspace.

Why ? We can zeroize the user structure directly, either with 
clear_user() or with some not yet existing unsafe_clear_user() or 
equivalent.

> 
> The implementation of copy_siginfo_to_user32 used to use the equivalent
> of user_access_begin() and user_access_end() and the code was a mess
> that was very difficult to reason about.  I recall their being holes
> in the structure that were being copied to userspace.
> 
> Meanwhile if you are going to set all of the bytes a cache hot temporary
> structure is quite cheap.

But how can we be sure it is cache hot ? As we are using memset() to 
zeroize it, it won't be loaded from memory as it will use dcbz 
instruction, but at some point in time it will get flushed back to 
memory, that's consuming anyway. Unless we invalidate it after the copy, 
but that becomes complex.

Christophe

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: remove set_fs calls from the exec and coredump code v2
  2020-04-19  8:19   ` Christoph Hellwig
@ 2020-04-19 11:50     ` Eric W. Biederman
  0 siblings, 0 replies; 34+ messages in thread
From: Eric W. Biederman @ 2020-04-19 11:50 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Alexander Viro, Jeremy Kerr, Arnd Bergmann,
	linuxppc-dev, linux-fsdevel, linux-kernel

Christoph Hellwig <hch@lst.de> writes:

> On Fri, Apr 17, 2020 at 05:41:52PM -0500, Eric W. Biederman wrote:
>> > this series gets rid of playing with the address limit in the exec and
>> > coredump code.  Most of this was fairly trivial, the biggest changes are
>> > those to the spufs coredump code.
>> >
>> > Changes since v1:
>> >  - properly spell NUL
>> >  - properly handle the compat siginfo case in ELF coredumps
>> 
>> Quick question is exec from a kernel thread within the scope of what you
>> are looking at?
>> 
>> There is a set_fs(USER_DS) in flush_old_exec whose sole purpose appears
>> to be to allow exec from kernel threads.  Where the kernel threads
>> run with set_fs(KERNEL_DS) until they call exec.
>
> This series doesn't really look at that area.  But I don't think exec
> from a kernel thread makes any sense, and cleaning up how to set the
> initial USER_DS vs KERNEL_DS state is something I'll eventually get to,
> it seems like a major mess at the moment.

Fair enough.  I just wanted to make certain that it is on people's radar
that when the kernel exec's init the arguments are read from kernel
memory and the set_fs(USER_DS) in flush_old_exec() that makes that not
work later.

It is subtle and easy to miss.  So I figured I would mention it since
I have been staring at the exec code a lot lately.

Eric

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2020-04-19 11:53 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-14  7:01 remove set_fs calls from the exec and coredump code v2 Christoph Hellwig
2020-04-14  7:01 ` [PATCH 1/8] powerpc/spufs: simplify spufs core dumping Christoph Hellwig
2020-04-14  7:01 ` [PATCH 2/8] signal: clean up __copy_siginfo_to_user32 Christoph Hellwig
2020-04-17 21:08   ` Eric W. Biederman
2020-04-17 21:09     ` [PATCH 1/2] signal: Factor copy_siginfo_to_external32 from copy_siginfo_to_user32 Eric W. Biederman
2020-04-18  8:05       ` Christophe Leroy
2020-04-18 11:55         ` Eric W. Biederman
2020-04-19  8:13           ` Christoph Hellwig
2020-04-19  9:46             ` Christophe Leroy
2020-04-19  9:54           ` Christophe Leroy
2020-04-19  8:05         ` Christoph Hellwig
2020-04-17 21:09     ` [PATCH 2/2] signal: Remove the set_fs in binfmt_elf.c:fill_siginfo_note Eric W. Biederman
2020-04-19  8:03     ` [PATCH 2/8] signal: clean up __copy_siginfo_to_user32 Christoph Hellwig
2020-04-14  7:01 ` [PATCH 3/8] signal: replace __copy_siginfo_to_user32 with to_compat_siginfo Christoph Hellwig
2020-04-14 14:00   ` Arnd Bergmann
2020-04-14  7:01 ` [PATCH 4/8] binfmt_elf: open code copy_siginfo_to_user to kernelspace buffer Christoph Hellwig
2020-04-14 13:15   ` Arnd Bergmann
2020-04-15  7:45     ` Christoph Hellwig
2020-04-15  8:20       ` Arnd Bergmann
2020-04-17 13:27         ` Christoph Hellwig
2020-04-17 18:10           ` Eric W. Biederman
2020-04-17 20:06             ` Arnd Bergmann
2020-04-15  3:01   ` Michael Ellerman
2020-04-15  6:19     ` Christoph Hellwig
2020-04-14  7:01 ` [PATCH 5/8] binfmt_elf: remove the set_fs(KERNEL_DS) in elf_core_dump Christoph Hellwig
2020-04-14  7:01 ` [PATCH 6/8] binfmt_elf_fdpic: remove the set_fs(KERNEL_DS) in elf_fdpic_core_dump Christoph Hellwig
2020-04-14  7:01 ` [PATCH 7/8] exec: simplify the copy_strings_kernel calling convention Christoph Hellwig
2020-04-14  7:01 ` [PATCH 8/8] exec: open code copy_string_kernel Christoph Hellwig
2020-04-18  8:15   ` Christophe Leroy
2020-04-19  8:06     ` Christoph Hellwig
2020-04-19  9:44       ` Christophe Leroy
2020-04-17 22:41 ` remove set_fs calls from the exec and coredump code v2 Eric W. Biederman
2020-04-19  8:19   ` Christoph Hellwig
2020-04-19 11:50     ` Eric W. Biederman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).