From: ebiederm@xmission.com (Eric W. Biederman)
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
Linus Torvalds <torvalds@linux-foundation.org>,
Christian Brauner <christian.brauner@ubuntu.com>,
Oleg Nesterov <oleg@redhat.com>, Jann Horn <jann@thejh.net>
Subject: [PATCH] files: rcu free files_struct
Date: Wed, 09 Dec 2020 12:04:38 -0600 [thread overview]
Message-ID: <87o8j2svnt.fsf_-_@x220.int.ebiederm.org> (raw)
In-Reply-To: <20201209142359.GN3579531@ZenIV.linux.org.uk> (Al Viro's message of "Wed, 9 Dec 2020 14:23:59 +0000")
This makes it safe to deference task->files with just rcu_read_lock
held, removing the need to take task_lock.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
I have cleaned up my patch a little more and made this a little
more explicitly rcu. I took a completley different approach to
documenting the use of rcu_head than we described that seems a little
more robust.
fs/file.c | 53 ++++++++++++++++++++++++++---------------
include/linux/fdtable.h | 1 +
kernel/fork.c | 4 ++--
3 files changed, 37 insertions(+), 21 deletions(-)
diff --git a/fs/file.c b/fs/file.c
index 412033d8cfdf..88064f185560 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -379,7 +379,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
return NULL;
}
-static struct fdtable *close_files(struct files_struct * files)
+static void close_files(struct files_struct * files)
{
/*
* It is safe to dereference the fd table without RCU or
@@ -397,8 +397,9 @@ static struct fdtable *close_files(struct files_struct * files)
set = fdt->open_fds[j++];
while (set) {
if (set & 1) {
- struct file * file = xchg(&fdt->fd[i], NULL);
+ struct file * file = fdt->fd[i];
if (file) {
+ rcu_assign_pointer(fdt->fd[i], NULL);
filp_close(file, files);
cond_resched();
}
@@ -407,19 +408,35 @@ static struct fdtable *close_files(struct files_struct * files)
set >>= 1;
}
}
+}
- return fdt;
+static void free_files_struct_rcu(struct rcu_head *rcu)
+{
+ struct files_struct *files =
+ container_of(rcu, struct files_struct, fdtab.rcu);
+ struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+
+ /* free the arrays if they are not embedded */
+ if (fdt != &files->fdtab)
+ __free_fdtable(fdt);
+ kmem_cache_free(files_cachep, files);
}
void put_files_struct(struct files_struct *files)
{
if (atomic_dec_and_test(&files->count)) {
- struct fdtable *fdt = close_files(files);
-
- /* free the arrays if they are not embedded */
- if (fdt != &files->fdtab)
- __free_fdtable(fdt);
- kmem_cache_free(files_cachep, files);
+ close_files(files);
+ /*
+ * The rules for using the rcu_head in fdtable:
+ *
+ * If the fdtable is being freed independently
+ * of the files_struct fdtable->rcu is used.
+ *
+ * When the fdtable and files_struct are freed
+ * together the rcu_head from the fdtable embedded in
+ * files_struct is used.
+ */
+ call_rcu(&files->fdtab.rcu, free_files_struct_rcu);
}
}
@@ -822,12 +839,14 @@ EXPORT_SYMBOL(fget_raw);
struct file *fget_task(struct task_struct *task, unsigned int fd)
{
+ struct files_struct *files;
struct file *file = NULL;
- task_lock(task);
- if (task->files)
- file = __fget_files(task->files, fd, 0, 1);
- task_unlock(task);
+ rcu_read_lock();
+ files = rcu_dereference(task->files);
+ if (files)
+ file = __fget_files(files, fd, 0, 1);
+ rcu_read_unlock();
return file;
}
@@ -838,11 +857,9 @@ struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
struct files_struct *files;
struct file *file = NULL;
- task_lock(task);
- files = task->files;
+ files = rcu_dereference(task->files);
if (files)
file = files_lookup_fd_rcu(files, fd);
- task_unlock(task);
return file;
}
@@ -854,8 +871,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
unsigned int fd = *ret_fd;
struct file *file = NULL;
- task_lock(task);
- files = task->files;
+ files = rcu_dereference(task->files);
if (files) {
for (; fd < files_fdtable(files)->max_fds; fd++) {
file = files_lookup_fd_rcu(files, fd);
@@ -863,7 +879,6 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
break;
}
}
- task_unlock(task);
*ret_fd = fd;
return file;
}
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index d0e78174874a..37dcface020f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -30,6 +30,7 @@ struct fdtable {
unsigned long *close_on_exec;
unsigned long *open_fds;
unsigned long *full_fds_bits;
+ /* Used for freeing fdtable and any containing files_struct */
struct rcu_head rcu;
};
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d0ae6f827df..eca474a1586a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2982,7 +2982,7 @@ int ksys_unshare(unsigned long unshare_flags)
if (new_fd) {
fd = current->files;
- current->files = new_fd;
+ rcu_assign_pointer(current->files, new_fd);
new_fd = fd;
}
@@ -3035,7 +3035,7 @@ int unshare_files(void)
old = task->files;
task_lock(task);
- task->files = copy;
+ rcu_assign_pointer(task->files, copy);
task_unlock(task);
put_files_struct(old);
return 0;
--
2.20.1
next prev parent reply other threads:[~2020-12-09 18:06 UTC|newest]
Thread overview: 77+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <87r1on1v62.fsf@x220.int.ebiederm.org>
2020-11-20 23:14 ` [PATCH v2 01/24] exec: Move unshare_files to fix posix file locking during exec Eric W. Biederman
2020-11-20 23:58 ` Linus Torvalds
2020-12-07 22:22 ` Al Viro
[not found] ` <87zh2pusdw.fsf@x220.int.ebiederm.org>
2020-12-07 23:35 ` Al Viro
2020-11-20 23:14 ` [PATCH v2 02/24] exec: Simplify unshare_files Eric W. Biederman
2020-11-23 17:50 ` Oleg Nesterov
2020-11-23 18:25 ` Linus Torvalds
[not found] ` <87im9vx08i.fsf@x220.int.ebiederm.org>
[not found] ` <87pn42r0n7.fsf@x220.int.ebiederm.org>
2020-11-24 19:58 ` Linus Torvalds
2020-11-24 20:14 ` Arnd Bergmann
2020-11-24 23:44 ` Geoff Levand
2020-11-25 1:16 ` Eric W. Biederman
2020-11-27 20:29 ` Arnd Bergmann
2020-11-30 21:37 ` Eric W. Biederman
2020-12-01 9:46 ` Michael Ellerman
2020-11-25 21:51 ` [RFC][PATCH] coredump: Document coredump code exclusively used by cell spufs Eric W. Biederman
2020-12-02 15:20 ` Eric W. Biederman
2020-12-02 15:58 ` Arnd Bergmann
2020-12-07 22:32 ` [PATCH v2 02/24] exec: Simplify unshare_files Al Viro
2020-11-20 23:14 ` [PATCH v2 03/24] exec: Remove reset_files_struct Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 04/24] kcmp: In kcmp_epoll_target use fget_task Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 05/24] bpf: In bpf_task_fd_query " Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 06/24] proc/fd: In proc_fd_link " Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 07/24] file: Rename __fcheck_files to files_lookup_fd_raw Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 08/24] file: Factor files_lookup_fd_locked out of fcheck_files Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 09/24] file: Replace fcheck_files with files_lookup_fd_rcu Eric W. Biederman
2020-12-07 22:46 ` Al Viro
2020-12-07 22:49 ` Al Viro
2020-12-09 16:54 ` Al Viro
2020-12-09 17:44 ` Paul E. McKenney
2020-11-20 23:14 ` [PATCH v2 10/24] file: Rename fcheck lookup_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 11/24] file: Implement task_lookup_fd_rcu Eric W. Biederman
2020-11-21 18:19 ` Cyrill Gorcunov
[not found] ` <87blfp1r8b.fsf@x220.int.ebiederm.org>
2020-11-22 13:52 ` Cyrill Gorcunov
2020-11-20 23:14 ` [PATCH v2 12/24] proc/fd: In tid_fd_mode use task_lookup_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 13/24] kcmp: In get_file_raw_ptr " Eric W. Biederman
2020-11-23 21:05 ` Cyrill Gorcunov
2020-11-20 23:14 ` [PATCH v2 14/24] file: Implement task_lookup_next_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 15/24] proc/fd: In proc_readfd_common use task_lookup_next_fd_rcu Eric W. Biederman
2020-12-07 23:29 ` Al Viro
[not found] ` <877dprvs8e.fsf@x220.int.ebiederm.org>
2020-12-09 4:07 ` Al Viro
[not found] ` <877dprtxly.fsf@x220.int.ebiederm.org>
2020-12-09 14:23 ` Al Viro
2020-12-09 18:04 ` Eric W. Biederman [this message]
2020-12-09 19:13 ` [PATCH] files: rcu free files_struct Linus Torvalds
2020-12-09 19:50 ` Al Viro
2020-12-09 21:32 ` Eric W. Biederman
2020-12-10 6:13 ` Al Viro
2020-12-10 19:29 ` Eric W. Biederman
2020-12-10 21:36 ` Al Viro
2020-12-10 22:30 ` Christian Brauner
2020-12-10 22:54 ` Al Viro
2020-12-10 23:10 ` Al Viro
2020-12-09 21:42 ` [PATCH -1/24] exec: Don't open code get_close_on_exec Eric W. Biederman
2020-12-09 22:04 ` [PATCH] files: rcu free files_struct Eric W. Biederman
2020-12-09 19:49 ` Matthew Wilcox
2020-12-09 22:06 ` Eric W. Biederman
2020-12-09 22:58 ` Al Viro
2020-12-09 23:01 ` Linus Torvalds
2020-12-09 23:04 ` Linus Torvalds
2020-12-09 23:07 ` Matthew Wilcox
2020-12-09 23:26 ` Paul E. McKenney
2020-12-09 23:29 ` Linus Torvalds
2020-12-10 0:39 ` Paul E. McKenney
2020-12-10 0:41 ` Linus Torvalds
2020-12-10 0:57 ` Paul E. McKenney
2020-11-20 23:14 ` [PATCH v2 16/24] bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu Eric W. Biederman
2020-11-23 17:06 ` Yonghong Song
2020-11-20 23:14 ` [PATCH v2 17/24] proc/fd: In fdinfo seq_show don't use get_files_struct Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 18/24] file: Merge __fd_install into fd_install Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 19/24] file: In f_dupfd read RLIMIT_NOFILE once Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 20/24] file: Merge __alloc_fd into alloc_fd Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 21/24] file: Rename __close_fd to close_fd and remove the files parameter Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 22/24] file: Replace ksys_close with close_fd Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 23/24] file: Rename __close_fd_get_file close_fd_get_file Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 24/24] file: Remove get_files_struct Eric W. Biederman
2020-11-21 0:05 ` [PATCH v2 00/24] exec: Move unshare_files and guarantee files_struct.count is correct Linus Torvalds
2020-11-28 5:12 ` Al Viro
2020-12-07 23:56 ` Al Viro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87o8j2svnt.fsf_-_@x220.int.ebiederm.org \
--to=ebiederm@xmission.com \
--cc=christian.brauner@ubuntu.com \
--cc=jann@thejh.net \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=oleg@redhat.com \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).