All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aleksa Sarai <cyphar@cyphar.com>
To: Josh Triplett <josh@joshtriplett.org>
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	io-uring@vger.kernel.org, linux-arch@vger.kernel.org,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Arnd Bergmann <arnd@arndb.de>, Jens Axboe <axboe@kernel.dk>
Subject: Re: [PATCH v3 1/3] fs: Support setting a minimum fd for "lowest available fd" allocation
Date: Wed, 8 Apr 2020 22:00:40 +1000	[thread overview]
Message-ID: <20200408120040.mtkqmymfazrv3lqk@yavin.dot.cyphar.com> (raw)
In-Reply-To: <90bf6fd43343ca862e7f61b0834baf2bdbd0e24c.1586321767.git.josh@joshtriplett.org>

[-- Attachment #1: Type: text/plain, Size: 6506 bytes --]

On 2020-04-07, Josh Triplett <josh@joshtriplett.org> wrote:
> Some applications want to prevent the usual "lowest available fd"
> allocation from allocating certain file descriptors. For instance, they
> may want to prevent allocation of a closed fd 0, 1, or 2 other than via
> dup2/dup3, or reserve some low file descriptors for other purposes.
> 
> Add a prctl to increase the minimum fd and return the previous minimum.
> 
> System calls that allocate a specific file descriptor, such as
> dup2/dup3, ignore this minimum.
> 
> exec resets the minimum fd, to prevent one program from interfering with
> another program's expectations about fd allocation.

Why is it implemented as an "increase the value" interface? It feels
like this is meant to avoid some kind of security trap (with a library
reducing the value) but it means that if you want to temporarily raise
the minimum fd number it's not possible (without re-exec()ing yourself,
which is hardly a fun thing to do).

Then again, this might've been discussed before and I missed it...

> Test program:
> 
>     #include <err.h>
>     #include <fcntl.h>
>     #include <stdio.h>
>     #include <sys/prctl.h>
> 
>     int main(int argc, char *argv[])
>     {
>         if (prctl(PR_INCREASE_MIN_FD, 100, 0, 0, 0) < 0)
>             err(1, "prctl");
>         int fd = open("/dev/null", O_RDONLY);
>         if (fd < 0)
>             err(1, "open");
>         printf("%d\n", fd); // prints 100
>         return 0;
>     }
> 
> Signed-off-by: Josh Triplett <josh@joshtriplett.org>
> ---
>  fs/file.c                  | 23 +++++++++++++++++------
>  include/linux/fdtable.h    |  1 +
>  include/linux/file.h       |  1 +
>  include/uapi/linux/prctl.h |  3 +++
>  kernel/sys.c               |  5 +++++
>  5 files changed, 27 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/file.c b/fs/file.c
> index c8a4e4c86e55..ba06140d89af 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -286,7 +286,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
>  	spin_lock_init(&newf->file_lock);
>  	newf->resize_in_progress = false;
>  	init_waitqueue_head(&newf->resize_wait);
> -	newf->next_fd = 0;
>  	new_fdt = &newf->fdtab;
>  	new_fdt->max_fds = NR_OPEN_DEFAULT;
>  	new_fdt->close_on_exec = newf->close_on_exec_init;
> @@ -295,6 +294,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
>  	new_fdt->fd = &newf->fd_array[0];
>  
>  	spin_lock(&oldf->file_lock);
> +	newf->next_fd = newf->min_fd = oldf->min_fd;
>  	old_fdt = files_fdtable(oldf);
>  	open_files = count_open_files(old_fdt);
>  
> @@ -487,9 +487,7 @@ int __alloc_fd(struct files_struct *files,
>  	spin_lock(&files->file_lock);
>  repeat:
>  	fdt = files_fdtable(files);
> -	fd = start;
> -	if (fd < files->next_fd)
> -		fd = files->next_fd;
> +	fd = max3(start, files->min_fd, files->next_fd);
>  
>  	if (fd < fdt->max_fds)
>  		fd = find_next_fd(fdt, fd);
> @@ -514,7 +512,7 @@ int __alloc_fd(struct files_struct *files,
>  		goto repeat;
>  
>  	if (start <= files->next_fd)
> -		files->next_fd = fd + 1;
> +		files->next_fd = max(fd + 1, files->min_fd);
>  
>  	__set_open_fd(fd, fdt);
>  	if (flags & O_CLOEXEC)
> @@ -555,7 +553,7 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd)
>  {
>  	struct fdtable *fdt = files_fdtable(files);
>  	__clear_open_fd(fd, fdt);
> -	if (fd < files->next_fd)
> +	if (fd < files->next_fd && fd >= files->min_fd)
>  		files->next_fd = fd;
>  }
>  
> @@ -684,6 +682,7 @@ void do_close_on_exec(struct files_struct *files)
>  
>  	/* exec unshares first */
>  	spin_lock(&files->file_lock);
> +	files->min_fd = 0;
>  	for (i = 0; ; i++) {
>  		unsigned long set;
>  		unsigned fd = i * BITS_PER_LONG;
> @@ -865,6 +864,18 @@ bool get_close_on_exec(unsigned int fd)
>  	return res;
>  }
>  
> +unsigned int increase_min_fd(unsigned int num)
> +{
> +	struct files_struct *files = current->files;
> +	unsigned int old_min_fd;
> +
> +	spin_lock(&files->file_lock);
> +	old_min_fd = files->min_fd;
> +	files->min_fd += num;
> +	spin_unlock(&files->file_lock);
> +	return old_min_fd;
> +}
> +
>  static int do_dup2(struct files_struct *files,
>  	struct file *file, unsigned fd, unsigned flags)
>  __releases(&files->file_lock)
> diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
> index f07c55ea0c22..d1980443d8b3 100644
> --- a/include/linux/fdtable.h
> +++ b/include/linux/fdtable.h
> @@ -60,6 +60,7 @@ struct files_struct {
>     */
>  	spinlock_t file_lock ____cacheline_aligned_in_smp;
>  	unsigned int next_fd;
> +	unsigned int min_fd; /* min for "lowest available fd" allocation */
>  	unsigned long close_on_exec_init[1];
>  	unsigned long open_fds_init[1];
>  	unsigned long full_fds_bits_init[1];
> diff --git a/include/linux/file.h b/include/linux/file.h
> index 142d102f285e..b67986f818d2 100644
> --- a/include/linux/file.h
> +++ b/include/linux/file.h
> @@ -88,6 +88,7 @@ extern bool get_close_on_exec(unsigned int fd);
>  extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile);
>  extern int get_unused_fd_flags(unsigned flags);
>  extern void put_unused_fd(unsigned int fd);
> +extern unsigned int increase_min_fd(unsigned int num);
>  
>  extern void fd_install(unsigned int fd, struct file *file);
>  
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 07b4f8131e36..916327272d21 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -238,4 +238,7 @@ struct prctl_mm_map {
>  #define PR_SET_IO_FLUSHER		57
>  #define PR_GET_IO_FLUSHER		58
>  
> +/* Increase minimum file descriptor for "lowest available fd" allocation */
> +#define PR_INCREASE_MIN_FD		59
> +
>  #endif /* _LINUX_PRCTL_H */
> diff --git a/kernel/sys.c b/kernel/sys.c
> index d325f3ab624a..daa0ce43cecc 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2514,6 +2514,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
>  
>  		error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER;
>  		break;
> +	case PR_INCREASE_MIN_FD:
> +		if (arg3 || arg4 || arg5)
> +			return -EINVAL;
> +		error = increase_min_fd((unsigned int)arg2);
> +		break;
>  	default:
>  		error = -EINVAL;
>  		break;
> -- 
> 2.26.0
> 


-- 
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
<https://www.cyphar.com/>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

  reply	other threads:[~2020-04-08 12:01 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-08  6:56 [PATCH v3 0/3] Support userspace-selected fds Josh Triplett
2020-04-08  6:57 ` [PATCH v3 1/3] fs: Support setting a minimum fd for "lowest available fd" allocation Josh Triplett
2020-04-08 12:00   ` Aleksa Sarai [this message]
2020-04-09  3:17     ` Josh Triplett
2020-04-08  6:57 ` [PATCH v3 2/3] fs: openat2: Extend open_how to allow userspace-selected fds Josh Triplett
2020-04-08 12:23   ` Aleksa Sarai
2020-04-09  5:00     ` Josh Triplett
2020-04-09  8:10     ` Aleksa Sarai
2020-04-08  6:57 ` [PATCH v3 3/3] fs: pipe2: Support O_SPECIFIC_FD Josh Triplett
2020-04-08 12:26 ` [PATCH v3 0/3] Support userspace-selected fds Aleksa Sarai
2020-04-09  3:19   ` Josh Triplett
  -- strict thread matches above, loose matches on Subject: below --
2020-04-04  5:57 Josh Triplett
2020-04-04  5:58 ` [PATCH v3 1/3] fs: Support setting a minimum fd for "lowest available fd" allocation Josh Triplett

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200408120040.mtkqmymfazrv3lqk@yavin.dot.cyphar.com \
    --to=cyphar@cyphar.com \
    --cc=arnd@arndb.de \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=josh@joshtriplett.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.