All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] fs: clear file privilege bits when mmap writing
@ 2015-12-04  5:24 ` Kees Cook
  0 siblings, 0 replies; 9+ messages in thread
From: Kees Cook @ 2015-12-04  5:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jan Kara, Willy Tarreau, Eric W. Biederman, Kirill A. Shutemov,
	Oleg Nesterov, Rik van Riel, Chen Gang, Davidlohr Bueso,
	Andrea Arcangeli, linux-mm, linux-kernel

Normally, when a user can modify a file that has setuid or setgid bits,
those bits are cleared when they are not the file owner or a member
of the group. This is enforced when using write and truncate but not
when writing to a shared mmap on the file. This could allow the file
writer to gain privileges by changing a binary without losing the
setuid/setgid/caps bits.

Changing the bits requires holding inode->i_mutex, so it cannot be done
during the page fault (due to mmap_sem being held during the fault).
Instead, clear the bits if PROT_WRITE is being used at mmap open time.
But we can't do the check in the right place inside mmap, so we have to
do it before holding mmap_sem, which means duplicating some checks, which
have to be available to the non-MMU builds too.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
v4:
 - fixed email to actually deliver again, sorry for any dups!
v3:
 - move outside of mmap_sem for real now, fengguang
 - check return code of file_remove_privs, akpm
v2:
 - move check from page fault to mmap open, jack
---
 include/linux/mm.h |  1 +
 mm/mmap.c          | 19 ++++---------------
 mm/util.c          | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00bad7793788..b264c8be7114 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1912,6 +1912,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
+extern int do_mmap_shared_checks(struct file *file, unsigned long prot);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
diff --git a/mm/mmap.c b/mm/mmap.c
index 2ce04a649f6b..bcbe592a2c49 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1321,24 +1321,13 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 
 	if (file) {
 		struct inode *inode = file_inode(file);
+		int err;
 
 		switch (flags & MAP_TYPE) {
 		case MAP_SHARED:
-			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
-				return -EACCES;
-
-			/*
-			 * Make sure we don't allow writing to an append-only
-			 * file..
-			 */
-			if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
-				return -EACCES;
-
-			/*
-			 * Make sure there are no mandatory locks on the file.
-			 */
-			if (locks_verify_locked(file))
-				return -EAGAIN;
+			err = do_mmap_shared_checks(file, prot);
+			if (err)
+				return err;
 
 			vm_flags |= VM_SHARED | VM_MAYSHARE;
 			if (!(file->f_mode & FMODE_WRITE))
diff --git a/mm/util.c b/mm/util.c
index 9af1c12b310c..1882eaf33a37 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -283,6 +283,29 @@ int __weak get_user_pages_fast(unsigned long start,
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
+int do_mmap_shared_checks(struct file *file, unsigned long prot)
+{
+	struct inode *inode = file_inode(file);
+
+	if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
+		return -EACCES;
+
+	/*
+	 * Make sure we don't allow writing to an append-only
+	 * file..
+	 */
+	if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
+		return -EACCES;
+
+	/*
+	 * Make sure there are no mandatory locks on the file.
+	 */
+	if (locks_verify_locked(file))
+		return -EAGAIN;
+
+	return 0;
+}
+
 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
 	unsigned long flag, unsigned long pgoff)
@@ -291,6 +314,33 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	unsigned long populate;
 
+	/*
+	 * If we must remove privs, we do it here since doing it during
+	 * page fault may be expensive and cannot hold inode->i_mutex,
+	 * since mm->mmap_sem is already held.
+	 */
+	if (file && (flag & MAP_TYPE) == MAP_SHARED && (prot & PROT_WRITE)) {
+		struct inode *inode = file_inode(file);
+		int err;
+
+		if (!IS_NOSEC(inode)) {
+			/*
+			 * Make sure we can't strip privs from a file that
+			 * wouldn't otherwise be allowed to be mmapped.
+			 */
+			err = do_mmap_shared_checks(file, prot);
+			if (err)
+				return err;
+
+			mutex_lock(&inode->i_mutex);
+			err = file_remove_privs(file);
+			mutex_unlock(&inode->i_mutex);
+
+			if (err)
+				return err;
+		}
+	}
+
 	ret = security_mmap_file(file, prot, flag);
 	if (!ret) {
 		down_write(&mm->mmap_sem);
-- 
1.9.1


-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v4] fs: clear file privilege bits when mmap writing
@ 2015-12-04  5:24 ` Kees Cook
  0 siblings, 0 replies; 9+ messages in thread
From: Kees Cook @ 2015-12-04  5:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jan Kara, Willy Tarreau, Eric W. Biederman, Kirill A. Shutemov,
	Oleg Nesterov, Rik van Riel, Chen Gang, Davidlohr Bueso,
	Andrea Arcangeli, linux-mm, linux-kernel

Normally, when a user can modify a file that has setuid or setgid bits,
those bits are cleared when they are not the file owner or a member
of the group. This is enforced when using write and truncate but not
when writing to a shared mmap on the file. This could allow the file
writer to gain privileges by changing a binary without losing the
setuid/setgid/caps bits.

Changing the bits requires holding inode->i_mutex, so it cannot be done
during the page fault (due to mmap_sem being held during the fault).
Instead, clear the bits if PROT_WRITE is being used at mmap open time.
But we can't do the check in the right place inside mmap, so we have to
do it before holding mmap_sem, which means duplicating some checks, which
have to be available to the non-MMU builds too.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
v4:
 - fixed email to actually deliver again, sorry for any dups!
v3:
 - move outside of mmap_sem for real now, fengguang
 - check return code of file_remove_privs, akpm
v2:
 - move check from page fault to mmap open, jack
---
 include/linux/mm.h |  1 +
 mm/mmap.c          | 19 ++++---------------
 mm/util.c          | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00bad7793788..b264c8be7114 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1912,6 +1912,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
+extern int do_mmap_shared_checks(struct file *file, unsigned long prot);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
diff --git a/mm/mmap.c b/mm/mmap.c
index 2ce04a649f6b..bcbe592a2c49 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1321,24 +1321,13 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 
 	if (file) {
 		struct inode *inode = file_inode(file);
+		int err;
 
 		switch (flags & MAP_TYPE) {
 		case MAP_SHARED:
-			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
-				return -EACCES;
-
-			/*
-			 * Make sure we don't allow writing to an append-only
-			 * file..
-			 */
-			if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
-				return -EACCES;
-
-			/*
-			 * Make sure there are no mandatory locks on the file.
-			 */
-			if (locks_verify_locked(file))
-				return -EAGAIN;
+			err = do_mmap_shared_checks(file, prot);
+			if (err)
+				return err;
 
 			vm_flags |= VM_SHARED | VM_MAYSHARE;
 			if (!(file->f_mode & FMODE_WRITE))
diff --git a/mm/util.c b/mm/util.c
index 9af1c12b310c..1882eaf33a37 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -283,6 +283,29 @@ int __weak get_user_pages_fast(unsigned long start,
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
+int do_mmap_shared_checks(struct file *file, unsigned long prot)
+{
+	struct inode *inode = file_inode(file);
+
+	if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
+		return -EACCES;
+
+	/*
+	 * Make sure we don't allow writing to an append-only
+	 * file..
+	 */
+	if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
+		return -EACCES;
+
+	/*
+	 * Make sure there are no mandatory locks on the file.
+	 */
+	if (locks_verify_locked(file))
+		return -EAGAIN;
+
+	return 0;
+}
+
 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
 	unsigned long flag, unsigned long pgoff)
@@ -291,6 +314,33 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 	struct mm_struct *mm = current->mm;
 	unsigned long populate;
 
+	/*
+	 * If we must remove privs, we do it here since doing it during
+	 * page fault may be expensive and cannot hold inode->i_mutex,
+	 * since mm->mmap_sem is already held.
+	 */
+	if (file && (flag & MAP_TYPE) == MAP_SHARED && (prot & PROT_WRITE)) {
+		struct inode *inode = file_inode(file);
+		int err;
+
+		if (!IS_NOSEC(inode)) {
+			/*
+			 * Make sure we can't strip privs from a file that
+			 * wouldn't otherwise be allowed to be mmapped.
+			 */
+			err = do_mmap_shared_checks(file, prot);
+			if (err)
+				return err;
+
+			mutex_lock(&inode->i_mutex);
+			err = file_remove_privs(file);
+			mutex_unlock(&inode->i_mutex);
+
+			if (err)
+				return err;
+		}
+	}
+
 	ret = security_mmap_file(file, prot, flag);
 	if (!ret) {
 		down_write(&mm->mmap_sem);
-- 
1.9.1


-- 
Kees Cook
Chrome OS & Brillo Security

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v4] fs: clear file privilege bits when mmap writing
  2015-12-09 12:49   ` Jan Kara
@ 2015-12-09 17:53     ` Kees Cook
  -1 siblings, 0 replies; 9+ messages in thread
From: Kees Cook @ 2015-12-09 17:53 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, yalin wang, Willy Tarreau, Eric W. Biederman,
	Kirill A. Shutemov, Oleg Nesterov, Rik van Riel, Chen Gang,
	Davidlohr Bueso, Andrea Arcangeli, Alexander Viro, linux-fsdevel,
	Linux-MM, LKML

On Wed, Dec 9, 2015 at 4:49 AM, Jan Kara <jack@suse.cz> wrote:
> On Tue 08-12-15 15:28:18, Kees Cook wrote:
>> Normally, when a user can modify a file that has setuid or setgid bits,
>> those bits are cleared when they are not the file owner or a member
>> of the group. This is enforced when using write and truncate but not
>> when writing to a shared mmap on the file. This could allow the file
>> writer to gain privileges by changing a binary without losing the
>> setuid/setgid/caps bits.
>>
>> Changing the bits requires holding inode->i_mutex, so it cannot be done
>> during the page fault (due to mmap_sem being held during the fault). We
>> could do this during vm_mmap_pgoff, but that would need coverage in
>> mprotect as well, but to check for MAP_SHARED, we'd need to hold mmap_sem
>> again.
>>
>> Instead, detect the need to clear the bits during the page fault, and
>> actually remove the bits during final fput. Since the file was open for
>> writing, it wouldn't have been possible to execute it yet.
>>
>> Signed-off-by: Kees Cook <keescook@chromium.org>
>> ---
>> Here's another way? I wonder which of these will actually work. I
>> wish we could reject writes if file_remove_privs() fails.
>
> Yeah, the fact that we cannot do anything with file_remove_privs() failure
> is rather unfortunate. So open for writing may be the best choice for
> file_remove_privs() in the end? It's not perfect but it looks like the
> least problematic solution.

Yeah, back to just the open itself. I can't even delay this to the mmap. :(

I will do a v5. :)

-Kees

>
> Frankly writeable files that have SUID / SGID bits set are IMHO problems on
> its own, with IMA attrs which are handled by file_remove_privs() as well
> things may be somewhat different.
>
>> diff --git a/fs/file_table.c b/fs/file_table.c
>> index ad17e05ebf95..abb537ef4344 100644
>> --- a/fs/file_table.c
>> +++ b/fs/file_table.c
>> @@ -191,6 +191,14 @@ static void __fput(struct file *file)
>>
>>       might_sleep();
>>
>> +     /*
>> +      * XXX: While avoiding mmap_sem, we've already been written to.
>> +      * We must ignore the return value, since we can't reject the
>> +      * write.
>> +      */
>> +     if (unlikely(file->f_remove_privs))
>> +             file_remove_privs(file);
>> +
>
> You're missing i_mutex locking again ;).
>
>                                                                 Honza
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR



-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4] fs: clear file privilege bits when mmap writing
@ 2015-12-09 17:53     ` Kees Cook
  0 siblings, 0 replies; 9+ messages in thread
From: Kees Cook @ 2015-12-09 17:53 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, yalin wang, Willy Tarreau, Eric W. Biederman,
	Kirill A. Shutemov, Oleg Nesterov, Rik van Riel, Chen Gang,
	Davidlohr Bueso, Andrea Arcangeli, Alexander Viro, linux-fsdevel,
	Linux-MM, LKML

On Wed, Dec 9, 2015 at 4:49 AM, Jan Kara <jack@suse.cz> wrote:
> On Tue 08-12-15 15:28:18, Kees Cook wrote:
>> Normally, when a user can modify a file that has setuid or setgid bits,
>> those bits are cleared when they are not the file owner or a member
>> of the group. This is enforced when using write and truncate but not
>> when writing to a shared mmap on the file. This could allow the file
>> writer to gain privileges by changing a binary without losing the
>> setuid/setgid/caps bits.
>>
>> Changing the bits requires holding inode->i_mutex, so it cannot be done
>> during the page fault (due to mmap_sem being held during the fault). We
>> could do this during vm_mmap_pgoff, but that would need coverage in
>> mprotect as well, but to check for MAP_SHARED, we'd need to hold mmap_sem
>> again.
>>
>> Instead, detect the need to clear the bits during the page fault, and
>> actually remove the bits during final fput. Since the file was open for
>> writing, it wouldn't have been possible to execute it yet.
>>
>> Signed-off-by: Kees Cook <keescook@chromium.org>
>> ---
>> Here's another way? I wonder which of these will actually work. I
>> wish we could reject writes if file_remove_privs() fails.
>
> Yeah, the fact that we cannot do anything with file_remove_privs() failure
> is rather unfortunate. So open for writing may be the best choice for
> file_remove_privs() in the end? It's not perfect but it looks like the
> least problematic solution.

Yeah, back to just the open itself. I can't even delay this to the mmap. :(

I will do a v5. :)

-Kees

>
> Frankly writeable files that have SUID / SGID bits set are IMHO problems on
> its own, with IMA attrs which are handled by file_remove_privs() as well
> things may be somewhat different.
>
>> diff --git a/fs/file_table.c b/fs/file_table.c
>> index ad17e05ebf95..abb537ef4344 100644
>> --- a/fs/file_table.c
>> +++ b/fs/file_table.c
>> @@ -191,6 +191,14 @@ static void __fput(struct file *file)
>>
>>       might_sleep();
>>
>> +     /*
>> +      * XXX: While avoiding mmap_sem, we've already been written to.
>> +      * We must ignore the return value, since we can't reject the
>> +      * write.
>> +      */
>> +     if (unlikely(file->f_remove_privs))
>> +             file_remove_privs(file);
>> +
>
> You're missing i_mutex locking again ;).
>
>                                                                 Honza
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR



-- 
Kees Cook
Chrome OS & Brillo Security

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4] fs: clear file privilege bits when mmap writing
  2015-12-08 23:28 ` Kees Cook
@ 2015-12-09 12:49   ` Jan Kara
  -1 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2015-12-09 12:49 UTC (permalink / raw)
  To: Kees Cook
  Cc: Andrew Morton, Jan Kara, yalin wang, Willy Tarreau,
	Eric W. Biederman, Kirill A. Shutemov, Oleg Nesterov,
	Rik van Riel, Chen Gang, Davidlohr Bueso, Andrea Arcangeli,
	Alexander Viro, linux-fsdevel, linux-mm, linux-kernel

On Tue 08-12-15 15:28:18, Kees Cook wrote:
> Normally, when a user can modify a file that has setuid or setgid bits,
> those bits are cleared when they are not the file owner or a member
> of the group. This is enforced when using write and truncate but not
> when writing to a shared mmap on the file. This could allow the file
> writer to gain privileges by changing a binary without losing the
> setuid/setgid/caps bits.
> 
> Changing the bits requires holding inode->i_mutex, so it cannot be done
> during the page fault (due to mmap_sem being held during the fault). We
> could do this during vm_mmap_pgoff, but that would need coverage in
> mprotect as well, but to check for MAP_SHARED, we'd need to hold mmap_sem
> again.
> 
> Instead, detect the need to clear the bits during the page fault, and
> actually remove the bits during final fput. Since the file was open for
> writing, it wouldn't have been possible to execute it yet.
> 
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
> Here's another way? I wonder which of these will actually work. I
> wish we could reject writes if file_remove_privs() fails.

Yeah, the fact that we cannot do anything with file_remove_privs() failure
is rather unfortunate. So open for writing may be the best choice for
file_remove_privs() in the end? It's not perfect but it looks like the
least problematic solution.

Frankly writeable files that have SUID / SGID bits set are IMHO problems on
its own, with IMA attrs which are handled by file_remove_privs() as well
things may be somewhat different.

> diff --git a/fs/file_table.c b/fs/file_table.c
> index ad17e05ebf95..abb537ef4344 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -191,6 +191,14 @@ static void __fput(struct file *file)
>  
>  	might_sleep();
>  
> +	/*
> +	 * XXX: While avoiding mmap_sem, we've already been written to.
> +	 * We must ignore the return value, since we can't reject the
> +	 * write.
> +	 */
> +	if (unlikely(file->f_remove_privs))
> +		file_remove_privs(file);
> +

You're missing i_mutex locking again ;).

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4] fs: clear file privilege bits when mmap writing
@ 2015-12-09 12:49   ` Jan Kara
  0 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2015-12-09 12:49 UTC (permalink / raw)
  To: Kees Cook
  Cc: Andrew Morton, Jan Kara, yalin wang, Willy Tarreau,
	Eric W. Biederman, Kirill A. Shutemov, Oleg Nesterov,
	Rik van Riel, Chen Gang, Davidlohr Bueso, Andrea Arcangeli,
	Alexander Viro, linux-fsdevel, linux-mm, linux-kernel

On Tue 08-12-15 15:28:18, Kees Cook wrote:
> Normally, when a user can modify a file that has setuid or setgid bits,
> those bits are cleared when they are not the file owner or a member
> of the group. This is enforced when using write and truncate but not
> when writing to a shared mmap on the file. This could allow the file
> writer to gain privileges by changing a binary without losing the
> setuid/setgid/caps bits.
> 
> Changing the bits requires holding inode->i_mutex, so it cannot be done
> during the page fault (due to mmap_sem being held during the fault). We
> could do this during vm_mmap_pgoff, but that would need coverage in
> mprotect as well, but to check for MAP_SHARED, we'd need to hold mmap_sem
> again.
> 
> Instead, detect the need to clear the bits during the page fault, and
> actually remove the bits during final fput. Since the file was open for
> writing, it wouldn't have been possible to execute it yet.
> 
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
> Here's another way? I wonder which of these will actually work. I
> wish we could reject writes if file_remove_privs() fails.

Yeah, the fact that we cannot do anything with file_remove_privs() failure
is rather unfortunate. So open for writing may be the best choice for
file_remove_privs() in the end? It's not perfect but it looks like the
least problematic solution.

Frankly writeable files that have SUID / SGID bits set are IMHO problems on
its own, with IMA attrs which are handled by file_remove_privs() as well
things may be somewhat different.

> diff --git a/fs/file_table.c b/fs/file_table.c
> index ad17e05ebf95..abb537ef4344 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -191,6 +191,14 @@ static void __fput(struct file *file)
>  
>  	might_sleep();
>  
> +	/*
> +	 * XXX: While avoiding mmap_sem, we've already been written to.
> +	 * We must ignore the return value, since we can't reject the
> +	 * write.
> +	 */
> +	if (unlikely(file->f_remove_privs))
> +		file_remove_privs(file);
> +

You're missing i_mutex locking again ;).

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4] fs: clear file privilege bits when mmap writing
  2015-12-08 23:28 ` Kees Cook
  (?)
@ 2015-12-09 12:04 ` Tetsuo Handa
  -1 siblings, 0 replies; 9+ messages in thread
From: Tetsuo Handa @ 2015-12-09 12:04 UTC (permalink / raw)
  To: Kees Cook, Andrew Morton
  Cc: Jan Kara, yalin wang, Willy Tarreau, Eric W. Biederman,
	Kirill A. Shutemov, Oleg Nesterov, Rik van Riel, Chen Gang,
	Davidlohr Bueso, Andrea Arcangeli, Alexander Viro, linux-fsdevel

On 2015/12/09 8:28, Kees Cook wrote:
> Changing the bits requires holding inode->i_mutex, so it cannot be done
> during the page fault (due to mmap_sem being held during the fault). We
> could do this during vm_mmap_pgoff, but that would need coverage in
> mprotect as well, but to check for MAP_SHARED, we'd need to hold mmap_sem
> again.
> 
> Instead, detect the need to clear the bits during the page fault, and
> actually remove the bits during final fput. Since the file was open for
> writing, it wouldn't have been possible to execute it yet.

Did you check that inode->i_mutex is held when final fput() is called?

Did you check a case where the file is copied between mmap() and final
fput() (i.e. open() for write, mmap() for write, sleep forever waiting
for the file owner to copy the content and attributes of the modified
file)?


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v4] fs: clear file privilege bits when mmap writing
@ 2015-12-08 23:28 ` Kees Cook
  0 siblings, 0 replies; 9+ messages in thread
From: Kees Cook @ 2015-12-08 23:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jan Kara, yalin wang, Willy Tarreau, Eric W. Biederman,
	Kirill A. Shutemov, Oleg Nesterov, Rik van Riel, Chen Gang,
	Davidlohr Bueso, Andrea Arcangeli, Alexander Viro, linux-fsdevel,
	linux-mm, linux-kernel

Normally, when a user can modify a file that has setuid or setgid bits,
those bits are cleared when they are not the file owner or a member
of the group. This is enforced when using write and truncate but not
when writing to a shared mmap on the file. This could allow the file
writer to gain privileges by changing a binary without losing the
setuid/setgid/caps bits.

Changing the bits requires holding inode->i_mutex, so it cannot be done
during the page fault (due to mmap_sem being held during the fault). We
could do this during vm_mmap_pgoff, but that would need coverage in
mprotect as well, but to check for MAP_SHARED, we'd need to hold mmap_sem
again.

Instead, detect the need to clear the bits during the page fault, and
actually remove the bits during final fput. Since the file was open for
writing, it wouldn't have been possible to execute it yet.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
Here's another way? I wonder which of these will actually work. I
wish we could reject writes if file_remove_privs() fails.

v4:
- delay removal instead of still needing mmap_sem for mprotect, yalin
v3:
- move outside of mmap_sem for real now, fengguang
- check return code of file_remove_privs, akpm
v2:
- move to mmap from fault handler, jack
---
 fs/file_table.c    | 8 ++++++++
 include/linux/fs.h | 1 +
 mm/memory.c        | 1 +
 3 files changed, 10 insertions(+)

diff --git a/fs/file_table.c b/fs/file_table.c
index ad17e05ebf95..abb537ef4344 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -191,6 +191,14 @@ static void __fput(struct file *file)
 
 	might_sleep();
 
+	/*
+	 * XXX: While avoiding mmap_sem, we've already been written to.
+	 * We must ignore the return value, since we can't reject the
+	 * write.
+	 */
+	if (unlikely(file->f_remove_privs))
+		file_remove_privs(file);
+
 	fsnotify_close(file);
 	/*
 	 * The function eventpoll_release() should be the first called
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3aa514254161..409bd7047e7e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -872,6 +872,7 @@ struct file {
 	struct list_head	f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
+	bool			f_remove_privs;
 } __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
 struct file_handle {
diff --git a/mm/memory.c b/mm/memory.c
index c387430f06c3..08a77e0cf65f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2036,6 +2036,7 @@ static inline int wp_page_reuse(struct mm_struct *mm,
 
 		if (!page_mkwrite)
 			file_update_time(vma->vm_file);
+		vma->vm_file->f_remove_privs = true;
 	}
 
 	return VM_FAULT_WRITE;
-- 
1.9.1


-- 
Kees Cook
Chrome OS & Brillo Security

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v4] fs: clear file privilege bits when mmap writing
@ 2015-12-08 23:28 ` Kees Cook
  0 siblings, 0 replies; 9+ messages in thread
From: Kees Cook @ 2015-12-08 23:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jan Kara, yalin wang, Willy Tarreau, Eric W. Biederman,
	Kirill A. Shutemov, Oleg Nesterov, Rik van Riel, Chen Gang,
	Davidlohr Bueso, Andrea Arcangeli, Alexander Viro, linux-fsdevel,
	linux-mm, linux-kernel

Normally, when a user can modify a file that has setuid or setgid bits,
those bits are cleared when they are not the file owner or a member
of the group. This is enforced when using write and truncate but not
when writing to a shared mmap on the file. This could allow the file
writer to gain privileges by changing a binary without losing the
setuid/setgid/caps bits.

Changing the bits requires holding inode->i_mutex, so it cannot be done
during the page fault (due to mmap_sem being held during the fault). We
could do this during vm_mmap_pgoff, but that would need coverage in
mprotect as well, but to check for MAP_SHARED, we'd need to hold mmap_sem
again.

Instead, detect the need to clear the bits during the page fault, and
actually remove the bits during final fput. Since the file was open for
writing, it wouldn't have been possible to execute it yet.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
Here's another way? I wonder which of these will actually work. I
wish we could reject writes if file_remove_privs() fails.

v4:
- delay removal instead of still needing mmap_sem for mprotect, yalin
v3:
- move outside of mmap_sem for real now, fengguang
- check return code of file_remove_privs, akpm
v2:
- move to mmap from fault handler, jack
---
 fs/file_table.c    | 8 ++++++++
 include/linux/fs.h | 1 +
 mm/memory.c        | 1 +
 3 files changed, 10 insertions(+)

diff --git a/fs/file_table.c b/fs/file_table.c
index ad17e05ebf95..abb537ef4344 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -191,6 +191,14 @@ static void __fput(struct file *file)
 
 	might_sleep();
 
+	/*
+	 * XXX: While avoiding mmap_sem, we've already been written to.
+	 * We must ignore the return value, since we can't reject the
+	 * write.
+	 */
+	if (unlikely(file->f_remove_privs))
+		file_remove_privs(file);
+
 	fsnotify_close(file);
 	/*
 	 * The function eventpoll_release() should be the first called
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3aa514254161..409bd7047e7e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -872,6 +872,7 @@ struct file {
 	struct list_head	f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
+	bool			f_remove_privs;
 } __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
 struct file_handle {
diff --git a/mm/memory.c b/mm/memory.c
index c387430f06c3..08a77e0cf65f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2036,6 +2036,7 @@ static inline int wp_page_reuse(struct mm_struct *mm,
 
 		if (!page_mkwrite)
 			file_update_time(vma->vm_file);
+		vma->vm_file->f_remove_privs = true;
 	}
 
 	return VM_FAULT_WRITE;
-- 
1.9.1


-- 
Kees Cook
Chrome OS & Brillo Security

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-12-09 17:53 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-04  5:24 [PATCH v4] fs: clear file privilege bits when mmap writing Kees Cook
2015-12-04  5:24 ` Kees Cook
2015-12-08 23:28 Kees Cook
2015-12-08 23:28 ` Kees Cook
2015-12-09 12:04 ` Tetsuo Handa
2015-12-09 12:49 ` Jan Kara
2015-12-09 12:49   ` Jan Kara
2015-12-09 17:53   ` Kees Cook
2015-12-09 17:53     ` Kees Cook

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.