All of lore.kernel.org
 help / color / mirror / Atom feed
From: ebiederm@xmission.com (Eric W. Biederman)
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Oleg Nesterov <oleg@redhat.com>, Jann Horn <jann@thejh.net>
Subject: [PATCH] files: rcu free files_struct
Date: Wed, 09 Dec 2020 12:04:38 -0600	[thread overview]
Message-ID: <87o8j2svnt.fsf_-_@x220.int.ebiederm.org> (raw)
In-Reply-To: <20201209142359.GN3579531@ZenIV.linux.org.uk> (Al Viro's message of "Wed, 9 Dec 2020 14:23:59 +0000")


This makes it safe to deference task->files with just rcu_read_lock
held, removing the need to take task_lock.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---

I have cleaned up my patch a little more and made this a little
more explicitly rcu.  I took a completley different approach to
documenting the use of rcu_head than we described that seems a little
more robust.

 fs/file.c               | 53 ++++++++++++++++++++++++++---------------
 include/linux/fdtable.h |  1 +
 kernel/fork.c           |  4 ++--
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 412033d8cfdf..88064f185560 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -379,7 +379,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
 	return NULL;
 }
 
-static struct fdtable *close_files(struct files_struct * files)
+static void close_files(struct files_struct * files)
 {
 	/*
 	 * It is safe to dereference the fd table without RCU or
@@ -397,8 +397,9 @@ static struct fdtable *close_files(struct files_struct * files)
 		set = fdt->open_fds[j++];
 		while (set) {
 			if (set & 1) {
-				struct file * file = xchg(&fdt->fd[i], NULL);
+				struct file * file = fdt->fd[i];
 				if (file) {
+					rcu_assign_pointer(fdt->fd[i], NULL);
 					filp_close(file, files);
 					cond_resched();
 				}
@@ -407,19 +408,35 @@ static struct fdtable *close_files(struct files_struct * files)
 			set >>= 1;
 		}
 	}
+}
 
-	return fdt;
+static void free_files_struct_rcu(struct rcu_head *rcu)
+{
+	struct files_struct *files =
+		container_of(rcu, struct files_struct, fdtab.rcu);
+	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+
+	/* free the arrays if they are not embedded */
+	if (fdt != &files->fdtab)
+		__free_fdtable(fdt);
+	kmem_cache_free(files_cachep, files);
 }
 
 void put_files_struct(struct files_struct *files)
 {
 	if (atomic_dec_and_test(&files->count)) {
-		struct fdtable *fdt = close_files(files);
-
-		/* free the arrays if they are not embedded */
-		if (fdt != &files->fdtab)
-			__free_fdtable(fdt);
-		kmem_cache_free(files_cachep, files);
+		close_files(files);
+		/*
+		 * The rules for using the rcu_head in fdtable:
+		 *
+		 * If the fdtable is being freed independently
+		 * of the files_struct fdtable->rcu is used.
+		 *
+		 * When the fdtable and files_struct are freed
+		 * together the rcu_head from the fdtable embedded in
+		 * files_struct is used.
+		 */
+		call_rcu(&files->fdtab.rcu, free_files_struct_rcu);
 	}
 }
 
@@ -822,12 +839,14 @@ EXPORT_SYMBOL(fget_raw);
 
 struct file *fget_task(struct task_struct *task, unsigned int fd)
 {
+	struct files_struct *files;
 	struct file *file = NULL;
 
-	task_lock(task);
-	if (task->files)
-		file = __fget_files(task->files, fd, 0, 1);
-	task_unlock(task);
+	rcu_read_lock();
+	files = rcu_dereference(task->files);
+	if (files)
+		file = __fget_files(files, fd, 0, 1);
+	rcu_read_unlock();
 
 	return file;
 }
@@ -838,11 +857,9 @@ struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
 	struct files_struct *files;
 	struct file *file = NULL;
 
-	task_lock(task);
-	files = task->files;
+	files = rcu_dereference(task->files);
 	if (files)
 		file = files_lookup_fd_rcu(files, fd);
-	task_unlock(task);
 
 	return file;
 }
@@ -854,8 +871,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
 	unsigned int fd = *ret_fd;
 	struct file *file = NULL;
 
-	task_lock(task);
-	files = task->files;
+	files = rcu_dereference(task->files);
 	if (files) {
 		for (; fd < files_fdtable(files)->max_fds; fd++) {
 			file = files_lookup_fd_rcu(files, fd);
@@ -863,7 +879,6 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
 				break;
 		}
 	}
-	task_unlock(task);
 	*ret_fd = fd;
 	return file;
 }
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index d0e78174874a..37dcface020f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -30,6 +30,7 @@ struct fdtable {
 	unsigned long *close_on_exec;
 	unsigned long *open_fds;
 	unsigned long *full_fds_bits;
+	/* Used for freeing fdtable and any containing files_struct */
 	struct rcu_head rcu;
 };
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d0ae6f827df..eca474a1586a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2982,7 +2982,7 @@ int ksys_unshare(unsigned long unshare_flags)
 
 		if (new_fd) {
 			fd = current->files;
-			current->files = new_fd;
+			rcu_assign_pointer(current->files, new_fd);
 			new_fd = fd;
 		}
 
@@ -3035,7 +3035,7 @@ int unshare_files(void)
 
 	old = task->files;
 	task_lock(task);
-	task->files = copy;
+	rcu_assign_pointer(task->files, copy);
 	task_unlock(task);
 	put_files_struct(old);
 	return 0;
-- 
2.20.1


  reply	other threads:[~2020-12-09 18:06 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <87r1on1v62.fsf@x220.int.ebiederm.org>
2020-11-20 23:14 ` [PATCH v2 01/24] exec: Move unshare_files to fix posix file locking during exec Eric W. Biederman
2020-11-20 23:58   ` Linus Torvalds
2020-12-07 22:22   ` Al Viro
     [not found]     ` <87zh2pusdw.fsf@x220.int.ebiederm.org>
2020-12-07 23:35       ` Al Viro
2020-11-20 23:14 ` [PATCH v2 02/24] exec: Simplify unshare_files Eric W. Biederman
2020-11-23 17:50   ` Oleg Nesterov
2020-11-23 18:25     ` Linus Torvalds
     [not found]       ` <87im9vx08i.fsf@x220.int.ebiederm.org>
     [not found]         ` <87pn42r0n7.fsf@x220.int.ebiederm.org>
2020-11-24 19:58           ` Linus Torvalds
2020-11-24 20:14             ` Arnd Bergmann
2020-11-24 23:44               ` Geoff Levand
2020-11-25  1:16                 ` Eric W. Biederman
2020-11-27 20:29                   ` Arnd Bergmann
2020-11-30 21:37                     ` Eric W. Biederman
2020-12-01  9:46                     ` Michael Ellerman
2020-11-25 21:51                 ` [RFC][PATCH] coredump: Document coredump code exclusively used by cell spufs Eric W. Biederman
2020-12-02 15:20                   ` Eric W. Biederman
2020-12-02 15:58                     ` Arnd Bergmann
2020-12-07 22:32       ` [PATCH v2 02/24] exec: Simplify unshare_files Al Viro
2020-11-20 23:14 ` [PATCH v2 03/24] exec: Remove reset_files_struct Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 04/24] kcmp: In kcmp_epoll_target use fget_task Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 05/24] bpf: In bpf_task_fd_query " Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 06/24] proc/fd: In proc_fd_link " Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 07/24] file: Rename __fcheck_files to files_lookup_fd_raw Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 08/24] file: Factor files_lookup_fd_locked out of fcheck_files Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 09/24] file: Replace fcheck_files with files_lookup_fd_rcu Eric W. Biederman
2020-12-07 22:46   ` Al Viro
2020-12-07 22:49     ` Al Viro
2020-12-09 16:54       ` Al Viro
2020-12-09 17:44         ` Paul E. McKenney
2020-11-20 23:14 ` [PATCH v2 10/24] file: Rename fcheck lookup_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 11/24] file: Implement task_lookup_fd_rcu Eric W. Biederman
2020-11-21 18:19   ` Cyrill Gorcunov
     [not found]     ` <87blfp1r8b.fsf@x220.int.ebiederm.org>
2020-11-22 13:52       ` Cyrill Gorcunov
2020-11-20 23:14 ` [PATCH v2 12/24] proc/fd: In tid_fd_mode use task_lookup_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 13/24] kcmp: In get_file_raw_ptr " Eric W. Biederman
2020-11-23 21:05   ` Cyrill Gorcunov
2020-11-20 23:14 ` [PATCH v2 14/24] file: Implement task_lookup_next_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 15/24] proc/fd: In proc_readfd_common use task_lookup_next_fd_rcu Eric W. Biederman
2020-12-07 23:29   ` Al Viro
     [not found]     ` <877dprvs8e.fsf@x220.int.ebiederm.org>
2020-12-09  4:07       ` Al Viro
     [not found]         ` <877dprtxly.fsf@x220.int.ebiederm.org>
2020-12-09 14:23           ` Al Viro
2020-12-09 18:04             ` Eric W. Biederman [this message]
2020-12-09 19:13               ` [PATCH] files: rcu free files_struct Linus Torvalds
2020-12-09 19:50                 ` Al Viro
2020-12-09 21:32                   ` Eric W. Biederman
2020-12-10  6:13                     ` Al Viro
2020-12-10 19:29                       ` Eric W. Biederman
2020-12-10 21:36                         ` Al Viro
2020-12-10 22:30                           ` Christian Brauner
2020-12-10 22:54                             ` Al Viro
2020-12-10 23:10                               ` Al Viro
2020-12-09 21:42                   ` [PATCH -1/24] exec: Don't open code get_close_on_exec Eric W. Biederman
2020-12-09 22:04                 ` [PATCH] files: rcu free files_struct Eric W. Biederman
2020-12-09 19:49               ` Matthew Wilcox
2020-12-09 22:06                 ` Eric W. Biederman
2020-12-09 22:58                 ` Al Viro
2020-12-09 23:01                   ` Linus Torvalds
2020-12-09 23:04                     ` Linus Torvalds
2020-12-09 23:07                     ` Matthew Wilcox
2020-12-09 23:26                       ` Paul E. McKenney
2020-12-09 23:29                       ` Linus Torvalds
2020-12-10  0:39                         ` Paul E. McKenney
2020-12-10  0:41                           ` Linus Torvalds
2020-12-10  0:57                             ` Paul E. McKenney
2020-11-20 23:14 ` [PATCH v2 16/24] bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu Eric W. Biederman
2020-11-23 17:06   ` Yonghong Song
2020-11-20 23:14 ` [PATCH v2 17/24] proc/fd: In fdinfo seq_show don't use get_files_struct Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 18/24] file: Merge __fd_install into fd_install Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 19/24] file: In f_dupfd read RLIMIT_NOFILE once Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 20/24] file: Merge __alloc_fd into alloc_fd Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 21/24] file: Rename __close_fd to close_fd and remove the files parameter Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 22/24] file: Replace ksys_close with close_fd Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 23/24] file: Rename __close_fd_get_file close_fd_get_file Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 24/24] file: Remove get_files_struct Eric W. Biederman
2020-11-21  0:05 ` [PATCH v2 00/24] exec: Move unshare_files and guarantee files_struct.count is correct Linus Torvalds
2020-11-28  5:12   ` Al Viro
2020-12-07 23:56     ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87o8j2svnt.fsf_-_@x220.int.ebiederm.org \
    --to=ebiederm@xmission.com \
    --cc=christian.brauner@ubuntu.com \
    --cc=jann@thejh.net \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.