linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Oleg Nesterov <oleg@redhat.com>, Jann Horn <jann@thejh.net>
Subject: [PATCH] files: rcu free files_struct
Date: Wed, 09 Dec 2020 12:04:38 -0600	[thread overview]
Message-ID: <87o8j2svnt.fsf_-_@x220.int.ebiederm.org> (raw)
In-Reply-To: <20201209142359.GN3579531@ZenIV.linux.org.uk> (Al Viro's message of "Wed, 9 Dec 2020 14:23:59 +0000")


This makes it safe to deference task->files with just rcu_read_lock
held, removing the need to take task_lock.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---

I have cleaned up my patch a little more and made this a little
more explicitly rcu.  I took a completley different approach to
documenting the use of rcu_head than we described that seems a little
more robust.

 fs/file.c               | 53 ++++++++++++++++++++++++++---------------
 include/linux/fdtable.h |  1 +
 kernel/fork.c           |  4 ++--
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 412033d8cfdf..88064f185560 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -379,7 +379,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
 	return NULL;
 }
 
-static struct fdtable *close_files(struct files_struct * files)
+static void close_files(struct files_struct * files)
 {
 	/*
 	 * It is safe to dereference the fd table without RCU or
@@ -397,8 +397,9 @@ static struct fdtable *close_files(struct files_struct * files)
 		set = fdt->open_fds[j++];
 		while (set) {
 			if (set & 1) {
-				struct file * file = xchg(&fdt->fd[i], NULL);
+				struct file * file = fdt->fd[i];
 				if (file) {
+					rcu_assign_pointer(fdt->fd[i], NULL);
 					filp_close(file, files);
 					cond_resched();
 				}
@@ -407,19 +408,35 @@ static struct fdtable *close_files(struct files_struct * files)
 			set >>= 1;
 		}
 	}
+}
 
-	return fdt;
+static void free_files_struct_rcu(struct rcu_head *rcu)
+{
+	struct files_struct *files =
+		container_of(rcu, struct files_struct, fdtab.rcu);
+	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+
+	/* free the arrays if they are not embedded */
+	if (fdt != &files->fdtab)
+		__free_fdtable(fdt);
+	kmem_cache_free(files_cachep, files);
 }
 
 void put_files_struct(struct files_struct *files)
 {
 	if (atomic_dec_and_test(&files->count)) {
-		struct fdtable *fdt = close_files(files);
-
-		/* free the arrays if they are not embedded */
-		if (fdt != &files->fdtab)
-			__free_fdtable(fdt);
-		kmem_cache_free(files_cachep, files);
+		close_files(files);
+		/*
+		 * The rules for using the rcu_head in fdtable:
+		 *
+		 * If the fdtable is being freed independently
+		 * of the files_struct fdtable->rcu is used.
+		 *
+		 * When the fdtable and files_struct are freed
+		 * together the rcu_head from the fdtable embedded in
+		 * files_struct is used.
+		 */
+		call_rcu(&files->fdtab.rcu, free_files_struct_rcu);
 	}
 }
 
@@ -822,12 +839,14 @@ EXPORT_SYMBOL(fget_raw);
 
 struct file *fget_task(struct task_struct *task, unsigned int fd)
 {
+	struct files_struct *files;
 	struct file *file = NULL;
 
-	task_lock(task);
-	if (task->files)
-		file = __fget_files(task->files, fd, 0, 1);
-	task_unlock(task);
+	rcu_read_lock();
+	files = rcu_dereference(task->files);
+	if (files)
+		file = __fget_files(files, fd, 0, 1);
+	rcu_read_unlock();
 
 	return file;
 }
@@ -838,11 +857,9 @@ struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
 	struct files_struct *files;
 	struct file *file = NULL;
 
-	task_lock(task);
-	files = task->files;
+	files = rcu_dereference(task->files);
 	if (files)
 		file = files_lookup_fd_rcu(files, fd);
-	task_unlock(task);
 
 	return file;
 }
@@ -854,8 +871,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
 	unsigned int fd = *ret_fd;
 	struct file *file = NULL;
 
-	task_lock(task);
-	files = task->files;
+	files = rcu_dereference(task->files);
 	if (files) {
 		for (; fd < files_fdtable(files)->max_fds; fd++) {
 			file = files_lookup_fd_rcu(files, fd);
@@ -863,7 +879,6 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
 				break;
 		}
 	}
-	task_unlock(task);
 	*ret_fd = fd;
 	return file;
 }
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index d0e78174874a..37dcface020f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -30,6 +30,7 @@ struct fdtable {
 	unsigned long *close_on_exec;
 	unsigned long *open_fds;
 	unsigned long *full_fds_bits;
+	/* Used for freeing fdtable and any containing files_struct */
 	struct rcu_head rcu;
 };
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d0ae6f827df..eca474a1586a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2982,7 +2982,7 @@ int ksys_unshare(unsigned long unshare_flags)
 
 		if (new_fd) {
 			fd = current->files;
-			current->files = new_fd;
+			rcu_assign_pointer(current->files, new_fd);
 			new_fd = fd;
 		}
 
@@ -3035,7 +3035,7 @@ int unshare_files(void)
 
 	old = task->files;
 	task_lock(task);
-	task->files = copy;
+	rcu_assign_pointer(task->files, copy);
 	task_unlock(task);
 	put_files_struct(old);
 	return 0;
-- 
2.20.1


  reply	other threads:[~2020-12-09 18:06 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <87r1on1v62.fsf@x220.int.ebiederm.org>
2020-11-20 23:14 ` [PATCH v2 01/24] exec: Move unshare_files to fix posix file locking during exec Eric W. Biederman
2020-11-20 23:58   ` Linus Torvalds
2020-12-07 22:22   ` Al Viro
     [not found]     ` <87zh2pusdw.fsf@x220.int.ebiederm.org>
2020-12-07 23:35       ` Al Viro
2020-11-20 23:14 ` [PATCH v2 02/24] exec: Simplify unshare_files Eric W. Biederman
2020-11-23 17:50   ` Oleg Nesterov
2020-11-23 18:25     ` Linus Torvalds
     [not found]       ` <87im9vx08i.fsf@x220.int.ebiederm.org>
     [not found]         ` <87pn42r0n7.fsf@x220.int.ebiederm.org>
2020-11-24 19:58           ` Linus Torvalds
2020-11-24 20:14             ` Arnd Bergmann
2020-11-24 23:44               ` Geoff Levand
2020-11-25  1:16                 ` Eric W. Biederman
2020-11-27 20:29                   ` Arnd Bergmann
2020-11-30 21:37                     ` Eric W. Biederman
2020-12-01  9:46                     ` Michael Ellerman
2020-11-25 21:51                 ` [RFC][PATCH] coredump: Document coredump code exclusively used by cell spufs Eric W. Biederman
2020-12-02 15:20                   ` Eric W. Biederman
2020-12-02 15:58                     ` Arnd Bergmann
2020-12-07 22:32       ` [PATCH v2 02/24] exec: Simplify unshare_files Al Viro
2020-11-20 23:14 ` [PATCH v2 03/24] exec: Remove reset_files_struct Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 04/24] kcmp: In kcmp_epoll_target use fget_task Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 05/24] bpf: In bpf_task_fd_query " Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 06/24] proc/fd: In proc_fd_link " Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 07/24] file: Rename __fcheck_files to files_lookup_fd_raw Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 08/24] file: Factor files_lookup_fd_locked out of fcheck_files Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 09/24] file: Replace fcheck_files with files_lookup_fd_rcu Eric W. Biederman
2020-12-07 22:46   ` Al Viro
2020-12-07 22:49     ` Al Viro
2020-12-09 16:54       ` Al Viro
2020-12-09 17:44         ` Paul E. McKenney
2020-11-20 23:14 ` [PATCH v2 10/24] file: Rename fcheck lookup_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 11/24] file: Implement task_lookup_fd_rcu Eric W. Biederman
2020-11-21 18:19   ` Cyrill Gorcunov
     [not found]     ` <87blfp1r8b.fsf@x220.int.ebiederm.org>
2020-11-22 13:52       ` Cyrill Gorcunov
2020-11-20 23:14 ` [PATCH v2 12/24] proc/fd: In tid_fd_mode use task_lookup_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 13/24] kcmp: In get_file_raw_ptr " Eric W. Biederman
2020-11-23 21:05   ` Cyrill Gorcunov
2020-11-20 23:14 ` [PATCH v2 14/24] file: Implement task_lookup_next_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 15/24] proc/fd: In proc_readfd_common use task_lookup_next_fd_rcu Eric W. Biederman
2020-12-07 23:29   ` Al Viro
     [not found]     ` <877dprvs8e.fsf@x220.int.ebiederm.org>
2020-12-09  4:07       ` Al Viro
     [not found]         ` <877dprtxly.fsf@x220.int.ebiederm.org>
2020-12-09 14:23           ` Al Viro
2020-12-09 18:04             ` Eric W. Biederman [this message]
2020-12-09 19:13               ` [PATCH] files: rcu free files_struct Linus Torvalds
2020-12-09 19:50                 ` Al Viro
2020-12-09 21:32                   ` Eric W. Biederman
2020-12-10  6:13                     ` Al Viro
2020-12-10 19:29                       ` Eric W. Biederman
2020-12-10 21:36                         ` Al Viro
2020-12-10 22:30                           ` Christian Brauner
2020-12-10 22:54                             ` Al Viro
2020-12-10 23:10                               ` Al Viro
2020-12-09 21:42                   ` [PATCH -1/24] exec: Don't open code get_close_on_exec Eric W. Biederman
2020-12-09 22:04                 ` [PATCH] files: rcu free files_struct Eric W. Biederman
2020-12-09 19:49               ` Matthew Wilcox
2020-12-09 22:06                 ` Eric W. Biederman
2020-12-09 22:58                 ` Al Viro
2020-12-09 23:01                   ` Linus Torvalds
2020-12-09 23:04                     ` Linus Torvalds
2020-12-09 23:07                     ` Matthew Wilcox
2020-12-09 23:26                       ` Paul E. McKenney
2020-12-09 23:29                       ` Linus Torvalds
2020-12-10  0:39                         ` Paul E. McKenney
2020-12-10  0:41                           ` Linus Torvalds
2020-12-10  0:57                             ` Paul E. McKenney
2020-11-20 23:14 ` [PATCH v2 16/24] bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu Eric W. Biederman
2020-11-23 17:06   ` Yonghong Song
2020-11-20 23:14 ` [PATCH v2 17/24] proc/fd: In fdinfo seq_show don't use get_files_struct Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 18/24] file: Merge __fd_install into fd_install Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 19/24] file: In f_dupfd read RLIMIT_NOFILE once Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 20/24] file: Merge __alloc_fd into alloc_fd Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 21/24] file: Rename __close_fd to close_fd and remove the files parameter Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 22/24] file: Replace ksys_close with close_fd Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 23/24] file: Rename __close_fd_get_file close_fd_get_file Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 24/24] file: Remove get_files_struct Eric W. Biederman
2020-11-21  0:05 ` [PATCH v2 00/24] exec: Move unshare_files and guarantee files_struct.count is correct Linus Torvalds
2020-11-28  5:12   ` Al Viro
2020-12-07 23:56     ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87o8j2svnt.fsf_-_@x220.int.ebiederm.org \
    --to=ebiederm@xmission.com \
    --cc=christian.brauner@ubuntu.com \
    --cc=jann@thejh.net \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).