linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Eric W. Biederman" <ebiederm@xmission.com>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, criu@openvz.org,
	bpf@vger.kernel.org,
	"Linus Torvalds" <torvalds@linux-foundation.org>,
	"Alexander Viro" <viro@zeniv.linux.org.uk>,
	"Christian Brauner" <christian.brauner@ubuntu.com>,
	"Oleg Nesterov" <oleg@redhat.com>,
	"Cyrill Gorcunov" <gorcunov@gmail.com>,
	"Jann Horn" <jann@thejh.net>, "Kees Cook" <keescook@chromium.org>,
	"Daniel P . Berrangé" <berrange@redhat.com>,
	"Jeff Layton" <jlayton@redhat.com>,
	"Miklos Szeredi" <miklos@szeredi.hu>,
	"Matthew Wilcox" <willy@infradead.org>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	"Trond Myklebust" <trond.myklebust@hammerspace.com>,
	"Chris Wright" <chrisw@redhat.com>,
	"Alexei Starovoitov" <ast@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Martin KaFai Lau" <kafai@fb.com>,
	"Song Liu" <songliubraving@fb.com>, "Yonghong Song" <yhs@fb.com>,
	"Andrii Nakryiko" <andriin@fb.com>,
	"John Fastabend" <john.fastabend@gmail.com>,
	"KP Singh" <kpsingh@chromium.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>
Subject: [PATCH v2 16/24] bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu
Date: Fri, 20 Nov 2020 17:14:33 -0600	[thread overview]
Message-ID: <20201120231441.29911-16-ebiederm@xmission.com> (raw)
In-Reply-To: <87r1on1v62.fsf@x220.int.ebiederm.org>

When discussing[1] exec and posix file locks it was realized that none
of the callers of get_files_struct fundamentally needed to call
get_files_struct, and that by switching them to helper functions
instead it will both simplify their code and remove unnecessary
increments of files_struct.count.  Those unnecessary increments can
result in exec unnecessarily unsharing files_struct which breaking
posix locks, and it can result in fget_light having to fallback to
fget reducing system performance.

Using task_lookup_next_fd_rcu simplifies task_file_seq_get_next, by
moving the checking for the maximum file descritor into the generic
code, and by remvoing the need for capturing and releasing a reference
on files_struct.  As the reference count of files_struct no longer
needs to be maintained bpf_iter_seq_task_file_info can have it's files
member removed and task_file_seq_get_next no longer needs it's fstruct
argument.

The curr_fd local variable does need to become unsigned to be used
with fnext_task.  As curr_fd is assigned from and assigned a u32
making curr_fd an unsigned int won't cause problems and might prevent
them.

[1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com
Suggested-by: Oleg Nesterov <oleg@redhat.com>
v1: https://lkml.kernel.org/r/20200817220425.9389-11-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/bpf/task_iter.c | 44 ++++++++++--------------------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 5ab2ccfb96cb..4ec63170c741 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -130,45 +130,33 @@ struct bpf_iter_seq_task_file_info {
 	 */
 	struct bpf_iter_seq_task_common common;
 	struct task_struct *task;
-	struct files_struct *files;
 	u32 tid;
 	u32 fd;
 };
 
 static struct file *
 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
-		       struct task_struct **task, struct files_struct **fstruct)
+		       struct task_struct **task)
 {
 	struct pid_namespace *ns = info->common.ns;
-	u32 curr_tid = info->tid, max_fds;
-	struct files_struct *curr_files;
+	u32 curr_tid = info->tid;
 	struct task_struct *curr_task;
-	int curr_fd = info->fd;
+	unsigned int curr_fd = info->fd;
 
 	/* If this function returns a non-NULL file object,
-	 * it held a reference to the task/files_struct/file.
+	 * it held a reference to the task/file.
 	 * Otherwise, it does not hold any reference.
 	 */
 again:
 	if (*task) {
 		curr_task = *task;
-		curr_files = *fstruct;
 		curr_fd = info->fd;
 	} else {
 		curr_task = task_seq_get_next(ns, &curr_tid, true);
 		if (!curr_task)
 			return NULL;
 
-		curr_files = get_files_struct(curr_task);
-		if (!curr_files) {
-			put_task_struct(curr_task);
-			curr_tid = ++(info->tid);
-			info->fd = 0;
-			goto again;
-		}
-
-		/* set *fstruct, *task and info->tid */
-		*fstruct = curr_files;
+		/* set *task and info->tid */
 		*task = curr_task;
 		if (curr_tid == info->tid) {
 			curr_fd = info->fd;
@@ -179,13 +167,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
 	}
 
 	rcu_read_lock();
-	max_fds = files_fdtable(curr_files)->max_fds;
-	for (; curr_fd < max_fds; curr_fd++) {
+	for (;; curr_fd++) {
 		struct file *f;
-
-		f = files_lookup_fd_rcu(curr_files, curr_fd);
+		f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
 		if (!f)
-			continue;
+			break;
 		if (!get_file_rcu(f))
 			continue;
 
@@ -197,10 +183,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
 
 	/* the current task is done, go to the next task */
 	rcu_read_unlock();
-	put_files_struct(curr_files);
 	put_task_struct(curr_task);
 	*task = NULL;
-	*fstruct = NULL;
 	info->fd = 0;
 	curr_tid = ++(info->tid);
 	goto again;
@@ -209,13 +193,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct bpf_iter_seq_task_file_info *info = seq->private;
-	struct files_struct *files = NULL;
 	struct task_struct *task = NULL;
 	struct file *file;
 
-	file = task_file_seq_get_next(info, &task, &files);
+	file = task_file_seq_get_next(info, &task);
 	if (!file) {
-		info->files = NULL;
 		info->task = NULL;
 		return NULL;
 	}
@@ -223,7 +205,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
 	if (*pos == 0)
 		++*pos;
 	info->task = task;
-	info->files = files;
 
 	return file;
 }
@@ -231,22 +212,19 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct bpf_iter_seq_task_file_info *info = seq->private;
-	struct files_struct *files = info->files;
 	struct task_struct *task = info->task;
 	struct file *file;
 
 	++*pos;
 	++info->fd;
 	fput((struct file *)v);
-	file = task_file_seq_get_next(info, &task, &files);
+	file = task_file_seq_get_next(info, &task);
 	if (!file) {
-		info->files = NULL;
 		info->task = NULL;
 		return NULL;
 	}
 
 	info->task = task;
-	info->files = files;
 
 	return file;
 }
@@ -295,9 +273,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v)
 		(void)__task_file_seq_show(seq, v, true);
 	} else {
 		fput((struct file *)v);
-		put_files_struct(info->files);
 		put_task_struct(info->task);
-		info->files = NULL;
 		info->task = NULL;
 	}
 }
-- 
2.25.0


  parent reply	other threads:[~2020-11-20 23:20 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <87r1on1v62.fsf@x220.int.ebiederm.org>
2020-11-20 23:14 ` [PATCH v2 01/24] exec: Move unshare_files to fix posix file locking during exec Eric W. Biederman
2020-11-20 23:58   ` Linus Torvalds
2020-12-07 22:22   ` Al Viro
     [not found]     ` <87zh2pusdw.fsf@x220.int.ebiederm.org>
2020-12-07 23:35       ` Al Viro
2020-11-20 23:14 ` [PATCH v2 02/24] exec: Simplify unshare_files Eric W. Biederman
2020-11-23 17:50   ` Oleg Nesterov
2020-11-23 18:25     ` Linus Torvalds
     [not found]       ` <87im9vx08i.fsf@x220.int.ebiederm.org>
     [not found]         ` <87pn42r0n7.fsf@x220.int.ebiederm.org>
2020-11-24 19:58           ` Linus Torvalds
2020-11-24 20:14             ` Arnd Bergmann
2020-11-24 23:44               ` Geoff Levand
2020-11-25  1:16                 ` Eric W. Biederman
2020-11-27 20:29                   ` Arnd Bergmann
2020-11-30 21:37                     ` Eric W. Biederman
2020-12-01  9:46                     ` Michael Ellerman
2020-11-25 21:51                 ` [RFC][PATCH] coredump: Document coredump code exclusively used by cell spufs Eric W. Biederman
2020-12-02 15:20                   ` Eric W. Biederman
2020-12-02 15:58                     ` Arnd Bergmann
2020-12-07 22:32       ` [PATCH v2 02/24] exec: Simplify unshare_files Al Viro
2020-11-20 23:14 ` [PATCH v2 03/24] exec: Remove reset_files_struct Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 04/24] kcmp: In kcmp_epoll_target use fget_task Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 05/24] bpf: In bpf_task_fd_query " Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 06/24] proc/fd: In proc_fd_link " Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 07/24] file: Rename __fcheck_files to files_lookup_fd_raw Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 08/24] file: Factor files_lookup_fd_locked out of fcheck_files Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 09/24] file: Replace fcheck_files with files_lookup_fd_rcu Eric W. Biederman
2020-12-07 22:46   ` Al Viro
2020-12-07 22:49     ` Al Viro
2020-12-09 16:54       ` Al Viro
2020-12-09 17:44         ` Paul E. McKenney
2020-11-20 23:14 ` [PATCH v2 10/24] file: Rename fcheck lookup_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 11/24] file: Implement task_lookup_fd_rcu Eric W. Biederman
2020-11-21 18:19   ` Cyrill Gorcunov
     [not found]     ` <87blfp1r8b.fsf@x220.int.ebiederm.org>
2020-11-22 13:52       ` Cyrill Gorcunov
2020-11-20 23:14 ` [PATCH v2 12/24] proc/fd: In tid_fd_mode use task_lookup_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 13/24] kcmp: In get_file_raw_ptr " Eric W. Biederman
2020-11-23 21:05   ` Cyrill Gorcunov
2020-11-20 23:14 ` [PATCH v2 14/24] file: Implement task_lookup_next_fd_rcu Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 15/24] proc/fd: In proc_readfd_common use task_lookup_next_fd_rcu Eric W. Biederman
2020-12-07 23:29   ` Al Viro
     [not found]     ` <877dprvs8e.fsf@x220.int.ebiederm.org>
2020-12-09  4:07       ` Al Viro
     [not found]         ` <877dprtxly.fsf@x220.int.ebiederm.org>
2020-12-09 14:23           ` Al Viro
2020-12-09 18:04             ` [PATCH] files: rcu free files_struct Eric W. Biederman
2020-12-09 19:13               ` Linus Torvalds
2020-12-09 19:50                 ` Al Viro
2020-12-09 21:32                   ` Eric W. Biederman
2020-12-10  6:13                     ` Al Viro
2020-12-10 19:29                       ` Eric W. Biederman
2020-12-10 21:36                         ` Al Viro
2020-12-10 22:30                           ` Christian Brauner
2020-12-10 22:54                             ` Al Viro
2020-12-10 23:10                               ` Al Viro
2020-12-09 21:42                   ` [PATCH -1/24] exec: Don't open code get_close_on_exec Eric W. Biederman
2020-12-09 22:04                 ` [PATCH] files: rcu free files_struct Eric W. Biederman
2020-12-09 19:49               ` Matthew Wilcox
2020-12-09 22:06                 ` Eric W. Biederman
2020-12-09 22:58                 ` Al Viro
2020-12-09 23:01                   ` Linus Torvalds
2020-12-09 23:04                     ` Linus Torvalds
2020-12-09 23:07                     ` Matthew Wilcox
2020-12-09 23:26                       ` Paul E. McKenney
2020-12-09 23:29                       ` Linus Torvalds
2020-12-10  0:39                         ` Paul E. McKenney
2020-12-10  0:41                           ` Linus Torvalds
2020-12-10  0:57                             ` Paul E. McKenney
2020-11-20 23:14 ` Eric W. Biederman [this message]
2020-11-23 17:06   ` [PATCH v2 16/24] bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu Yonghong Song
2020-11-20 23:14 ` [PATCH v2 17/24] proc/fd: In fdinfo seq_show don't use get_files_struct Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 18/24] file: Merge __fd_install into fd_install Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 19/24] file: In f_dupfd read RLIMIT_NOFILE once Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 20/24] file: Merge __alloc_fd into alloc_fd Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 21/24] file: Rename __close_fd to close_fd and remove the files parameter Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 22/24] file: Replace ksys_close with close_fd Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 23/24] file: Rename __close_fd_get_file close_fd_get_file Eric W. Biederman
2020-11-20 23:14 ` [PATCH v2 24/24] file: Remove get_files_struct Eric W. Biederman
2020-11-21  0:05 ` [PATCH v2 00/24] exec: Move unshare_files and guarantee files_struct.count is correct Linus Torvalds
2020-11-28  5:12   ` Al Viro
2020-12-07 23:56     ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201120231441.29911-16-ebiederm@xmission.com \
    --to=ebiederm@xmission.com \
    --cc=andriin@fb.com \
    --cc=ast@kernel.org \
    --cc=berrange@redhat.com \
    --cc=bfields@fieldses.org \
    --cc=bpf@vger.kernel.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=chrisw@redhat.com \
    --cc=criu@openvz.org \
    --cc=daniel@iogearbox.net \
    --cc=gorcunov@gmail.com \
    --cc=jann@thejh.net \
    --cc=jlayton@redhat.com \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=keescook@chromium.org \
    --cc=kpsingh@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=oleg@redhat.com \
    --cc=songliubraving@fb.com \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@hammerspace.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    --cc=yhs@fb.com \
    --subject='Re: [PATCH v2 16/24] bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).