linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] proc: revert /proc/*/cmdline rewrite
       [not found] <20190713072855.GB23167@avx2>
@ 2019-07-13  7:32 ` Alexey Dobriyan
  2019-07-13 14:00   ` Alexey Izbyshev
  2019-07-14  4:16   ` Eric W. Biederman
       [not found] ` <CAHk-=wj_mrNnM-q_z95GcNB=Ab4LaUC6Bi6Q-+3Q9u9NC=3iDA@mail.gmail.com>
  1 sibling, 2 replies; 6+ messages in thread
From: Alexey Dobriyan @ 2019-07-13  7:32 UTC (permalink / raw)
  To: akpm; +Cc: izbyshev, oleg, mkubecek, torvalds, shasta, linux-kernel, security

/proc/*/cmdline continues to cause problems:

	https://lkml.org/lkml/2019/4/5/825
	Subject get_mm_cmdline and userspace (Perl) changing argv0

	https://marc.info/?l=linux-kernel&m=156294831308786&w=4
	[PATCH] proc: Fix uninitialized byte read in get_mm_cmdline()

This patch reverts implementation to the last known good state.
Revert

	commit f5b65348fd77839b50e79bc0a5e536832ea52d8d
	proc: fix missing final NUL in get_mm_cmdline() rewrite

	commit 5ab8271899658042fabc5ae7e6a99066a210bc0e
	fs/proc: simplify and clarify get_mm_cmdline() function

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

	Cc lists

 fs/proc/base.c |  198 +++++++++++++++++++++++++++++++++------------------------
 1 file changed, 118 insertions(+), 80 deletions(-)

--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -210,16 +210,24 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
 }
 
 static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
-			      size_t count, loff_t *ppos)
+			      size_t _count, loff_t *pos)
 {
+	unsigned long count = _count;
 	unsigned long arg_start, arg_end, env_start, env_end;
-	unsigned long pos, len;
+	unsigned long len1, len2, len;
+	unsigned long p;
 	char *page;
+	ssize_t rv;
+	char c;
 
 	/* Check if process spawned far enough to have cmdline. */
 	if (!mm->env_end)
 		return 0;
 
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
 	spin_lock(&mm->arg_lock);
 	arg_start = mm->arg_start;
 	arg_end = mm->arg_end;
@@ -227,96 +235,126 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
 	env_end = mm->env_end;
 	spin_unlock(&mm->arg_lock);
 
-	if (arg_start >= arg_end)
-		return 0;
+	BUG_ON(arg_start > arg_end);
+	BUG_ON(env_start > env_end);
+
+	len1 = arg_end - arg_start;
+	len2 = env_end - env_start;
 
+	/* Empty ARGV. */
+	if (len1 == 0) {
+		rv = 0;
+		goto out_free_page;
+	}
 	/*
-	 * We have traditionally allowed the user to re-write
-	 * the argument strings and overflow the end result
-	 * into the environment section. But only do that if
-	 * the environment area is contiguous to the arguments.
+	 * Inherently racy -- command line shares address space
+	 * with code and data.
 	 */
-	if (env_start != arg_end || env_start >= env_end)
-		env_start = env_end = arg_end;
-
-	/* .. and limit it to a maximum of one page of slop */
-	if (env_end >= arg_end + PAGE_SIZE)
-		env_end = arg_end + PAGE_SIZE - 1;
-
-	/* We're not going to care if "*ppos" has high bits set */
-	pos = arg_start + *ppos;
-
-	/* .. but we do check the result is in the proper range */
-	if (pos < arg_start || pos >= env_end)
-		return 0;
-
-	/* .. and we never go past env_end */
-	if (env_end - pos < count)
-		count = env_end - pos;
-
-	page = (char *)__get_free_page(GFP_KERNEL);
-	if (!page)
-		return -ENOMEM;
-
-	len = 0;
-	while (count) {
-		int got;
-		size_t size = min_t(size_t, PAGE_SIZE, count);
-		long offset;
+	rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
+	if (rv <= 0)
+		goto out_free_page;
+
+	rv = 0;
+
+	if (c == '\0') {
+		/* Command line (set of strings) occupies whole ARGV. */
+		if (len1 <= *pos)
+			goto out_free_page;
+
+		p = arg_start + *pos;
+		len = len1 - *pos;
+		while (count > 0 && len > 0) {
+			unsigned int _count;
+			int nr_read;
+
+			_count = min3(count, len, PAGE_SIZE);
+			nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
+			if (nr_read < 0)
+				rv = nr_read;
+			if (nr_read <= 0)
+				goto out_free_page;
+
+			if (copy_to_user(buf, page, nr_read)) {
+				rv = -EFAULT;
+				goto out_free_page;
+			}
 
+			p	+= nr_read;
+			len	-= nr_read;
+			buf	+= nr_read;
+			count	-= nr_read;
+			rv	+= nr_read;
+		}
+	} else {
 		/*
-		 * Are we already starting past the official end?
-		 * We always include the last byte that is *supposed*
-		 * to be NUL
+		 * Command line (1 string) occupies ARGV and
+		 * extends into ENVP.
 		 */
-		offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
-
-		got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
-		if (got <= offset)
-			break;
-		got -= offset;
-
-		/* Don't walk past a NUL character once you hit arg_end */
-		if (pos + got >= arg_end) {
-			int n = 0;
-
-			/*
-			 * If we started before 'arg_end' but ended up
-			 * at or after it, we start the NUL character
-			 * check at arg_end-1 (where we expect the normal
-			 * EOF to be).
-			 *
-			 * NOTE! This is smaller than 'got', because
-			 * pos + got >= arg_end
-			 */
-			if (pos < arg_end)
-				n = arg_end - pos - 1;
-
-			/* Cut off at first NUL after 'n' */
-			got = n + strnlen(page+n, offset+got-n);
-			if (got < offset)
-				break;
-			got -= offset;
-
-			/* Include the NUL if it existed */
-			if (got < size)
-				got++;
+		struct {
+			unsigned long p;
+			unsigned long len;
+		} cmdline[2] = {
+			{ .p = arg_start, .len = len1 },
+			{ .p = env_start, .len = len2 },
+		};
+		loff_t pos1 = *pos;
+		unsigned int i;
+
+		i = 0;
+		while (i < 2 && pos1 >= cmdline[i].len) {
+			pos1 -= cmdline[i].len;
+			i++;
 		}
+		while (i < 2) {
+			p = cmdline[i].p + pos1;
+			len = cmdline[i].len - pos1;
+			while (count > 0 && len > 0) {
+				unsigned int _count, l;
+				int nr_read;
+				bool final;
+
+				_count = min3(count, len, PAGE_SIZE);
+				nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
+				if (nr_read < 0)
+					rv = nr_read;
+				if (nr_read <= 0)
+					goto out_free_page;
+
+				/*
+				 * Command line can be shorter than whole ARGV
+				 * even if last "marker" byte says it is not.
+				 */
+				final = false;
+				l = strnlen(page, nr_read);
+				if (l < nr_read) {
+					nr_read = l;
+					final = true;
+				}
+
+				if (copy_to_user(buf, page, nr_read)) {
+					rv = -EFAULT;
+					goto out_free_page;
+				}
+
+				p	+= nr_read;
+				len	-= nr_read;
+				buf	+= nr_read;
+				count	-= nr_read;
+				rv	+= nr_read;
+
+				if (final)
+					goto out_free_page;
+			}
 
-		got -= copy_to_user(buf, page+offset, got);
-		if (unlikely(!got)) {
-			if (!len)
-				len = -EFAULT;
-			break;
+			/* Only first chunk can be read partially. */
+			pos1 = 0;
+			i++;
 		}
-		pos += got;
-		buf += got;
-		len += got;
-		count -= got;
 	}
 
+out_free_page:
 	free_page((unsigned long)page);
-	return len;
+	return rv;
 }
 
 static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf,

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] proc: revert /proc/*/cmdline rewrite
  2019-07-13  7:32 ` [PATCH] proc: revert /proc/*/cmdline rewrite Alexey Dobriyan
@ 2019-07-13 14:00   ` Alexey Izbyshev
  2019-07-14  4:16   ` Eric W. Biederman
  1 sibling, 0 replies; 6+ messages in thread
From: Alexey Izbyshev @ 2019-07-13 14:00 UTC (permalink / raw)
  To: Alexey Dobriyan, akpm
  Cc: oleg, mkubecek, torvalds, shasta, linux-kernel, security

On 7/13/19 10:32 AM, Alexey Dobriyan wrote:
> /proc/*/cmdline continues to cause problems:
> 
> 	https://lkml.org/lkml/2019/4/5/825
> 	Subject get_mm_cmdline and userspace (Perl) changing argv0
> 
> 	https://marc.info/?l=linux-kernel&m=156294831308786&w=4
> 	[PATCH] proc: Fix uninitialized byte read in get_mm_cmdline()
> 
> This patch reverts implementation to the last known good state.

I confirm that this revert fixes both issues above on my system (x86_64).

Alexey

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] proc: revert /proc/*/cmdline rewrite
  2019-07-13  7:32 ` [PATCH] proc: revert /proc/*/cmdline rewrite Alexey Dobriyan
  2019-07-13 14:00   ` Alexey Izbyshev
@ 2019-07-14  4:16   ` Eric W. Biederman
  2019-07-14  5:02     ` Linus Torvalds
  1 sibling, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2019-07-14  4:16 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: akpm, izbyshev, oleg, mkubecek, torvalds, shasta, linux-kernel, security

Alexey Dobriyan <adobriyan@gmail.com> writes:

> /proc/*/cmdline continues to cause problems:
>
> 	https://lkml.org/lkml/2019/4/5/825
> 	Subject get_mm_cmdline and userspace (Perl) changing argv0
>
> 	https://marc.info/?l=linux-kernel&m=156294831308786&w=4
> 	[PATCH] proc: Fix uninitialized byte read in get_mm_cmdline()
>
> This patch reverts implementation to the last known good state.
> Revert
>
> 	commit f5b65348fd77839b50e79bc0a5e536832ea52d8d
> 	proc: fix missing final NUL in get_mm_cmdline() rewrite
>
> 	commit 5ab8271899658042fabc5ae7e6a99066a210bc0e
> 	fs/proc: simplify and clarify get_mm_cmdline() function
>
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>

Given that this fixes a regression and a bug.

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>

> ---
>
> 	Cc lists
>
>  fs/proc/base.c |  198 +++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 118 insertions(+), 80 deletions(-)
>
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -210,16 +210,24 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
>  }
>  
>  static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
> -			      size_t count, loff_t *ppos)
> +			      size_t _count, loff_t *pos)
>  {
> +	unsigned long count = _count;
>  	unsigned long arg_start, arg_end, env_start, env_end;
> -	unsigned long pos, len;
> +	unsigned long len1, len2, len;
> +	unsigned long p;
>  	char *page;
> +	ssize_t rv;
> +	char c;
>  
>  	/* Check if process spawned far enough to have cmdline. */
>  	if (!mm->env_end)
>  		return 0;
>  
> +	page = (char *)__get_free_page(GFP_KERNEL);
> +	if (!page)
> +		return -ENOMEM;
> +
>  	spin_lock(&mm->arg_lock);
>  	arg_start = mm->arg_start;
>  	arg_end = mm->arg_end;
> @@ -227,96 +235,126 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
>  	env_end = mm->env_end;
>  	spin_unlock(&mm->arg_lock);
>  
> -	if (arg_start >= arg_end)
> -		return 0;
> +	BUG_ON(arg_start > arg_end);
> +	BUG_ON(env_start > env_end);
> +
> +	len1 = arg_end - arg_start;
> +	len2 = env_end - env_start;
>  
> +	/* Empty ARGV. */
> +	if (len1 == 0) {
> +		rv = 0;
> +		goto out_free_page;
> +	}
>  	/*
> -	 * We have traditionally allowed the user to re-write
> -	 * the argument strings and overflow the end result
> -	 * into the environment section. But only do that if
> -	 * the environment area is contiguous to the arguments.
> +	 * Inherently racy -- command line shares address space
> +	 * with code and data.
>  	 */
> -	if (env_start != arg_end || env_start >= env_end)
> -		env_start = env_end = arg_end;
> -
> -	/* .. and limit it to a maximum of one page of slop */
> -	if (env_end >= arg_end + PAGE_SIZE)
> -		env_end = arg_end + PAGE_SIZE - 1;
> -
> -	/* We're not going to care if "*ppos" has high bits set */
> -	pos = arg_start + *ppos;
> -
> -	/* .. but we do check the result is in the proper range */
> -	if (pos < arg_start || pos >= env_end)
> -		return 0;
> -
> -	/* .. and we never go past env_end */
> -	if (env_end - pos < count)
> -		count = env_end - pos;
> -
> -	page = (char *)__get_free_page(GFP_KERNEL);
> -	if (!page)
> -		return -ENOMEM;
> -
> -	len = 0;
> -	while (count) {
> -		int got;
> -		size_t size = min_t(size_t, PAGE_SIZE, count);
> -		long offset;
> +	rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON);
> +	if (rv <= 0)
> +		goto out_free_page;
> +
> +	rv = 0;
> +
> +	if (c == '\0') {
> +		/* Command line (set of strings) occupies whole ARGV. */
> +		if (len1 <= *pos)
> +			goto out_free_page;
> +
> +		p = arg_start + *pos;
> +		len = len1 - *pos;
> +		while (count > 0 && len > 0) {
> +			unsigned int _count;
> +			int nr_read;
> +
> +			_count = min3(count, len, PAGE_SIZE);
> +			nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
> +			if (nr_read < 0)
> +				rv = nr_read;
> +			if (nr_read <= 0)
> +				goto out_free_page;
> +
> +			if (copy_to_user(buf, page, nr_read)) {
> +				rv = -EFAULT;
> +				goto out_free_page;
> +			}
>  
> +			p	+= nr_read;
> +			len	-= nr_read;
> +			buf	+= nr_read;
> +			count	-= nr_read;
> +			rv	+= nr_read;
> +		}
> +	} else {
>  		/*
> -		 * Are we already starting past the official end?
> -		 * We always include the last byte that is *supposed*
> -		 * to be NUL
> +		 * Command line (1 string) occupies ARGV and
> +		 * extends into ENVP.
>  		 */
> -		offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
> -
> -		got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
> -		if (got <= offset)
> -			break;
> -		got -= offset;
> -
> -		/* Don't walk past a NUL character once you hit arg_end */
> -		if (pos + got >= arg_end) {
> -			int n = 0;
> -
> -			/*
> -			 * If we started before 'arg_end' but ended up
> -			 * at or after it, we start the NUL character
> -			 * check at arg_end-1 (where we expect the normal
> -			 * EOF to be).
> -			 *
> -			 * NOTE! This is smaller than 'got', because
> -			 * pos + got >= arg_end
> -			 */
> -			if (pos < arg_end)
> -				n = arg_end - pos - 1;
> -
> -			/* Cut off at first NUL after 'n' */
> -			got = n + strnlen(page+n, offset+got-n);
> -			if (got < offset)
> -				break;
> -			got -= offset;
> -
> -			/* Include the NUL if it existed */
> -			if (got < size)
> -				got++;
> +		struct {
> +			unsigned long p;
> +			unsigned long len;
> +		} cmdline[2] = {
> +			{ .p = arg_start, .len = len1 },
> +			{ .p = env_start, .len = len2 },
> +		};
> +		loff_t pos1 = *pos;
> +		unsigned int i;
> +
> +		i = 0;
> +		while (i < 2 && pos1 >= cmdline[i].len) {
> +			pos1 -= cmdline[i].len;
> +			i++;
>  		}
> +		while (i < 2) {
> +			p = cmdline[i].p + pos1;
> +			len = cmdline[i].len - pos1;
> +			while (count > 0 && len > 0) {
> +				unsigned int _count, l;
> +				int nr_read;
> +				bool final;
> +
> +				_count = min3(count, len, PAGE_SIZE);
> +				nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON);
> +				if (nr_read < 0)
> +					rv = nr_read;
> +				if (nr_read <= 0)
> +					goto out_free_page;
> +
> +				/*
> +				 * Command line can be shorter than whole ARGV
> +				 * even if last "marker" byte says it is not.
> +				 */
> +				final = false;
> +				l = strnlen(page, nr_read);
> +				if (l < nr_read) {
> +					nr_read = l;
> +					final = true;
> +				}
> +
> +				if (copy_to_user(buf, page, nr_read)) {
> +					rv = -EFAULT;
> +					goto out_free_page;
> +				}
> +
> +				p	+= nr_read;
> +				len	-= nr_read;
> +				buf	+= nr_read;
> +				count	-= nr_read;
> +				rv	+= nr_read;
> +
> +				if (final)
> +					goto out_free_page;
> +			}
>  
> -		got -= copy_to_user(buf, page+offset, got);
> -		if (unlikely(!got)) {
> -			if (!len)
> -				len = -EFAULT;
> -			break;
> +			/* Only first chunk can be read partially. */
> +			pos1 = 0;
> +			i++;
>  		}
> -		pos += got;
> -		buf += got;
> -		len += got;
> -		count -= got;
>  	}
>  
> +out_free_page:
>  	free_page((unsigned long)page);
> -	return len;
> +	return rv;
>  }
>  
>  static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf,

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] proc: revert /proc/*/cmdline rewrite
  2019-07-14  4:16   ` Eric W. Biederman
@ 2019-07-14  5:02     ` Linus Torvalds
  0 siblings, 0 replies; 6+ messages in thread
From: Linus Torvalds @ 2019-07-14  5:02 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Alexey Dobriyan, Andrew Morton, Alexey Izbyshev, Oleg Nesterov,
	Michal Kubecek, shasta, Linux List Kernel Mailing,
	Security Officers

[-- Attachment #1: Type: text/plain, Size: 505 bytes --]

[ Apparently this thread wasn't on the lists, and I didn't even
notice. So re-sending the patches ]

On Sat, Jul 13, 2019 at 9:16 PM Eric W. Biederman <ebiederm@xmission.com> wrote:
> Given that this fixes a regression and a bug.
>
> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>

I'd much rather start from scratch. Like the attached.

Alexey Izbyshev has a third patch that then limits the setproctitle()
case to only allow looking into the env[] area, which looks
reasonable.

           Linus

[-- Attachment #2: 0001-proc-pid-cmdline-remove-all-the-special-cases.patch --]
[-- Type: text/x-patch, Size: 3739 bytes --]

From 5563a2fb39fe0ad42f239d2f583128d50155002b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 13 Jul 2019 13:40:13 -0700
Subject: [PATCH 1/2] /proc/<pid>/cmdline: remove all the special cases

Start off with a clean slate that only reads exactly from arg_start to
arg_end, without any oddities.

We'll add back the setproctitle() special case very differently in the
next commit.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 71 ++++++--------------------------------------------
 1 file changed, 8 insertions(+), 63 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 255f6754c70d..8040f9d1cf07 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -212,7 +212,7 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
 static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
 			      size_t count, loff_t *ppos)
 {
-	unsigned long arg_start, arg_end, env_start, env_end;
+	unsigned long arg_start, arg_end;
 	unsigned long pos, len;
 	char *page;
 
@@ -223,36 +223,18 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
 	spin_lock(&mm->arg_lock);
 	arg_start = mm->arg_start;
 	arg_end = mm->arg_end;
-	env_start = mm->env_start;
-	env_end = mm->env_end;
 	spin_unlock(&mm->arg_lock);
 
 	if (arg_start >= arg_end)
 		return 0;
 
-	/*
-	 * We have traditionally allowed the user to re-write
-	 * the argument strings and overflow the end result
-	 * into the environment section. But only do that if
-	 * the environment area is contiguous to the arguments.
-	 */
-	if (env_start != arg_end || env_start >= env_end)
-		env_start = env_end = arg_end;
-
-	/* .. and limit it to a maximum of one page of slop */
-	if (env_end >= arg_end + PAGE_SIZE)
-		env_end = arg_end + PAGE_SIZE - 1;
-
 	/* We're not going to care if "*ppos" has high bits set */
-	pos = arg_start + *ppos;
-
 	/* .. but we do check the result is in the proper range */
-	if (pos < arg_start || pos >= env_end)
+	pos = arg_start + *ppos;
+	if (pos < arg_start || pos >= arg_end)
 		return 0;
-
-	/* .. and we never go past env_end */
-	if (env_end - pos < count)
-		count = env_end - pos;
+	if (count > arg_end - pos)
+		count = arg_end - pos;
 
 	page = (char *)__get_free_page(GFP_KERNEL);
 	if (!page)
@@ -262,48 +244,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
 	while (count) {
 		int got;
 		size_t size = min_t(size_t, PAGE_SIZE, count);
-		long offset;
 
-		/*
-		 * Are we already starting past the official end?
-		 * We always include the last byte that is *supposed*
-		 * to be NUL
-		 */
-		offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
-
-		got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
-		if (got <= offset)
+		got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
+		if (got <= 0)
 			break;
-		got -= offset;
-
-		/* Don't walk past a NUL character once you hit arg_end */
-		if (pos + got >= arg_end) {
-			int n = 0;
-
-			/*
-			 * If we started before 'arg_end' but ended up
-			 * at or after it, we start the NUL character
-			 * check at arg_end-1 (where we expect the normal
-			 * EOF to be).
-			 *
-			 * NOTE! This is smaller than 'got', because
-			 * pos + got >= arg_end
-			 */
-			if (pos < arg_end)
-				n = arg_end - pos - 1;
-
-			/* Cut off at first NUL after 'n' */
-			got = n + strnlen(page+n, offset+got-n);
-			if (got < offset)
-				break;
-			got -= offset;
-
-			/* Include the NUL if it existed */
-			if (got < size)
-				got++;
-		}
-
-		got -= copy_to_user(buf, page+offset, got);
+		got -= copy_to_user(buf, page, got);
 		if (unlikely(!got)) {
 			if (!len)
 				len = -EFAULT;
-- 
2.22.0.193.g083935f9a2


[-- Attachment #3: 0002-proc-pid-cmdline-add-back-the-setproctitle-special-c.patch --]
[-- Type: text/x-patch, Size: 2855 bytes --]

From 63dd14999c6b210fbe33f780fec53faefa867d95 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 13 Jul 2019 14:27:14 -0700
Subject: [PATCH 2/2] /proc/<pid>/cmdline: add back the setproctitle() special
 case

This makes the setproctitle() special case very explicit indeed, and
handles it with a separate helper function entirely.

This makes the logic about when we use the string lengths etc much more
obvious, and makes it easy to see what we do.

[ Fixed for missing 'count' check noted by Alexey Izbyshev ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8040f9d1cf07..3ad3ff4cc12c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -209,12 +209,54 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
 	return result;
 }
 
+/*
+ * If the user used setproctitle(), we just get the string from
+ * user space at arg_start, and limit it to a maximum of one page.
+ */
+static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
+				size_t count, loff_t *ppos,
+				unsigned long arg_start)
+{
+	unsigned long pos = *ppos;
+	char *page;
+	int ret, got;
+
+	if (pos >= PAGE_SIZE)
+		return 0;
+
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	ret = 0;
+	got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
+	if (got > 0) {
+		int len = strnlen(page, got);
+
+		/* Include the NUL character if it was found */
+		if (len < got)
+			len++;
+
+		if (len > pos) {
+			len -= pos;
+			if (len > count)
+				len = count;
+			len -= copy_to_user(buf, page+pos, len);
+			if (!len)
+				len = -EFAULT;
+			ret = len;
+		}
+	}
+	free_page((unsigned long)page);
+	return ret;
+}
+
 static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
 			      size_t count, loff_t *ppos)
 {
 	unsigned long arg_start, arg_end;
 	unsigned long pos, len;
-	char *page;
+	char *page, c;
 
 	/* Check if process spawned far enough to have cmdline. */
 	if (!mm->env_end)
@@ -228,6 +270,16 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
 	if (arg_start >= arg_end)
 		return 0;
 
+	/*
+	 * Magical special case: if the argv[] end byte is not
+	 * zero, the user has overwritten it with setproctitle(3).
+	 *
+	 * Possible future enhancement: do this only once when
+	 * pos is 0, and set a flag in the 'struct file'.
+	 */
+	if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
+		return get_mm_proctitle(mm, buf, count, ppos, arg_start);
+
 	/* We're not going to care if "*ppos" has high bits set */
 	/* .. but we do check the result is in the proper range */
 	pos = arg_start + *ppos;
-- 
2.22.0.193.g083935f9a2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] proc: revert /proc/*/cmdline rewrite
       [not found] ` <CAHk-=wj_mrNnM-q_z95GcNB=Ab4LaUC6Bi6Q-+3Q9u9NC=3iDA@mail.gmail.com>
@ 2019-07-14  9:51   ` Alexey Dobriyan
  2019-07-14 18:12     ` Linus Torvalds
  0 siblings, 1 reply; 6+ messages in thread
From: Alexey Dobriyan @ 2019-07-14  9:51 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, izbyshev, Oleg Nesterov, Michal Kubecek, shasta,
	linux-kernel, security

	[re-add lists]

On Sat, Jul 13, 2019 at 10:50:20AM -0700, Linus Torvalds wrote:
> On Sat, Jul 13, 2019 at 12:29 AM Alexey Dobriyan <adobriyan@gmail.com> wrote:
> >
> > /proc/*/cmdline continues to cause problems:
> 
> If we're reverting this, then we should revert all the way back to the
> original fixed-length one that had the original semantics and was
> simple.

No, because all those Java applications have command lines measured in
dozens of kilobytes.

> What was the problem with the one-line fix instead?

The problem is that I can't even drag this trivia in out of _fear_ that
it is userspace observable:

	https://marc.info/?t=155863429700002&r=1&w=4
	[PATCH] elf: fix "start_code" evaluation

and yet the patch which did a regression and an infoleak continues
to be papered over and for which the only justification was
"simplify and clarify".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] proc: revert /proc/*/cmdline rewrite
  2019-07-14  9:51   ` Alexey Dobriyan
@ 2019-07-14 18:12     ` Linus Torvalds
  0 siblings, 0 replies; 6+ messages in thread
From: Linus Torvalds @ 2019-07-14 18:12 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: Andrew Morton, Alexey Izbyshev, Oleg Nesterov, Michal Kubecek,
	shasta, Linux List Kernel Mailing, Security Officers

On Sun, Jul 14, 2019 at 2:52 AM Alexey Dobriyan <adobriyan@gmail.com> wrote:
>
> The problem is that I can't even drag this trivia in out of _fear_ that
> it is userspace observable:
>
>         https://marc.info/?t=155863429700002&r=1&w=4
>         [PATCH] elf: fix "start_code" evaluation

Oh, we should do things like this all the time.

"Observable" isn't a problem per se. It only turns into a problem when
it actually breaks things.

We should always strive for understandable - and maintainable - code.

"Make it as simple as possible, but no simpler".

Of course, if you can prove that some change isn't observable, then
that is always the safer change.

Because any observable change _can_ (and admittedly surprisingly
often) does end up showing that yes, somebody out there depended on
some particularly subtle observable detail.

But often "observable" doesn't mean "breakage", and it's not that
uncommon that we do things that have slightly different semantics in
order to clean stuff up (or fix actual bugs that cause problems).

The important thing when something observable _does_ cause actual
breakage is that it gets fixed, and that people don't try to make
excuses for it. In particular the "but I fixed a bug" is _not_ an
excuse for causing some user load to break, because that was just
another bug.

Of course, it's also perfectly valid to say "ok, this could be
improved, but changing it changes observable behavior, and it's not
worth my time to worry about whether something can break".

So there should certainly be a *worry* about breakage (and the pain
that breakage can cause) when making cleanups etc. But as long as you
stand behind your cleanup and know that you may have to fix it up if
somebody reports an issue with it, it's all good.

IOW, it's a balancing thing. Do you think the cleanup is worth the
"this may come back to bite me" problem?

> and yet the patch which did a regression and an infoleak continues
> to be papered over and for which the only justification was
> "simplify and clarify".

See above. "Simplify and clarify" is a good excuse in general.

What is *not* a good excuse is then if somebody doesn't stand up and
say "oh, my bad, I screwed up, and here's the fix for the breakage".

In this case it took a year for people to report problems, which shows
that at least the breakage wasn't obvious.

And I'd rather fix it by cleaning up *more* and making the rules
simpler and easier to understand.

Don't get me wrong - reverting is often a good strategy too.

I will revert very aggressively when close to a release, for example,
when we just don't have time to try to figure things out. Or if the
breakage is large enough that it hinders people from testing and
working on other things.

Or if the original developer is not responsive and there isn't
somebody around that goes "ok, that can be fixed by xyz.."

Then "let's just revert" is the right thing to do. It can be the
simplest thing, when you just don't have the resources to do anything
else, or it's not just worth it.

                     Linus

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-07-14 18:13 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20190713072855.GB23167@avx2>
2019-07-13  7:32 ` [PATCH] proc: revert /proc/*/cmdline rewrite Alexey Dobriyan
2019-07-13 14:00   ` Alexey Izbyshev
2019-07-14  4:16   ` Eric W. Biederman
2019-07-14  5:02     ` Linus Torvalds
     [not found] ` <CAHk-=wj_mrNnM-q_z95GcNB=Ab4LaUC6Bi6Q-+3Q9u9NC=3iDA@mail.gmail.com>
2019-07-14  9:51   ` Alexey Dobriyan
2019-07-14 18:12     ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).