From: Kui-Feng Lee <kuifeng@fb.com>
To: <bpf@vger.kernel.org>, <ast@kernel.org>, <daniel@iogearbox.net>,
<andrii@kernel.org>, <kernel-team@fb.com>, <yhs@fb.com>
Cc: Kui-Feng Lee <kuifeng@fb.com>
Subject: [PATCH bpf-next v2 1/3] bpf: Parameterize task iterators.
Date: Mon, 1 Aug 2022 16:26:47 -0700 [thread overview]
Message-ID: <20220801232649.2306614-2-kuifeng@fb.com> (raw)
In-Reply-To: <20220801232649.2306614-1-kuifeng@fb.com>
Allow creating an iterator that loops through resources of one task/thread.
People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process. Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.
Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
---
include/linux/bpf.h | 4 ++
include/uapi/linux/bpf.h | 23 +++++++++
kernel/bpf/task_iter.c | 93 ++++++++++++++++++++++++++--------
tools/include/uapi/linux/bpf.h | 23 +++++++++
4 files changed, 121 insertions(+), 22 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 11950029284f..3c26dbfc9cef 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1718,6 +1718,10 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
struct bpf_iter_aux_info {
struct bpf_map *map;
+ struct {
+ u32 tid;
+ u8 type;
+ } task;
};
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ffcbf79a556b..ed5ba501609f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -87,10 +87,33 @@ struct bpf_cgroup_storage_key {
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
+enum bpf_task_iter_type {
+ BPF_TASK_ITER_ALL = 0,
+ BPF_TASK_ITER_TID,
+};
+
union bpf_iter_link_info {
struct {
__u32 map_fd;
} map;
+ /*
+ * Parameters of task iterators.
+ */
+ struct {
+ __u32 pid_fd;
+ /*
+ * The type of the iterator.
+ *
+ * It can be one of enum bpf_task_iter_type.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ * The iterator iterates over resources of everyprocess.
+ *
+ * BPF_TASK_ITER_TID
+ * You should also set *pid_fd* to iterate over one task.
+ */
+ __u8 type; /* BPF_TASK_ITER_* */
+ } task;
};
/* BPF syscall commands, see bpf(2) man-page for more details. */
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8c921799def4..9942601e1dfb 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -12,6 +12,8 @@
struct bpf_iter_seq_task_common {
struct pid_namespace *ns;
+ u32 tid;
+ u8 type;
};
struct bpf_iter_seq_task_info {
@@ -22,18 +24,31 @@ struct bpf_iter_seq_task_info {
u32 tid;
};
-static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
+static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
u32 *tid,
bool skip_if_dup_files)
{
struct task_struct *task = NULL;
struct pid *pid;
+ if (common->type == BPF_TASK_ITER_TID) {
+ if (*tid && *tid != common->tid)
+ return NULL;
+ rcu_read_lock();
+ pid = find_pid_ns(common->tid, common->ns);
+ if (pid) {
+ task = get_pid_task(pid, PIDTYPE_PID);
+ *tid = common->tid;
+ }
+ rcu_read_unlock();
+ return task;
+ }
+
rcu_read_lock();
retry:
- pid = find_ge_pid(*tid, ns);
+ pid = find_ge_pid(*tid, common->ns);
if (pid) {
- *tid = pid_nr_ns(pid, ns);
+ *tid = pid_nr_ns(pid, common->ns);
task = get_pid_task(pid, PIDTYPE_PID);
if (!task) {
++*tid;
@@ -56,7 +71,8 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
struct bpf_iter_seq_task_info *info = seq->private;
struct task_struct *task;
- task = task_seq_get_next(info->common.ns, &info->tid, false);
+ task = task_seq_get_next(&info->common, &info->tid, false);
+
if (!task)
return NULL;
@@ -73,7 +89,8 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
++info->tid;
put_task_struct((struct task_struct *)v);
- task = task_seq_get_next(info->common.ns, &info->tid, false);
+
+ task = task_seq_get_next(&info->common, &info->tid, false);
if (!task)
return NULL;
@@ -117,6 +134,30 @@ static void task_seq_stop(struct seq_file *seq, void *v)
put_task_struct((struct task_struct *)v);
}
+static int bpf_iter_attach_task(struct bpf_prog *prog,
+ union bpf_iter_link_info *linfo,
+ struct bpf_iter_aux_info *aux)
+{
+ unsigned int flags;
+ struct task_struct *tsk;
+
+ if (linfo->task.type == BPF_TASK_ITER_ALL && linfo->task.pid_fd != 0)
+ return -EINVAL;
+
+ aux->task.type = linfo->task.type;
+
+ if (linfo->task.type == BPF_TASK_ITER_TID) {
+ tsk = pidfd_get_task(linfo->task.pid_fd, &flags);
+ if (IS_ERR(tsk))
+ return PTR_ERR(tsk);
+
+ aux->task.tid = tsk->pid;
+ put_task_struct(tsk);
+ }
+
+ return 0;
+}
+
static const struct seq_operations task_seq_ops = {
.start = task_seq_start,
.next = task_seq_next,
@@ -137,8 +178,7 @@ struct bpf_iter_seq_task_file_info {
static struct file *
task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
{
- struct pid_namespace *ns = info->common.ns;
- u32 curr_tid = info->tid;
+ u32 saved_tid = info->tid;
struct task_struct *curr_task;
unsigned int curr_fd = info->fd;
@@ -151,21 +191,18 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
curr_task = info->task;
curr_fd = info->fd;
} else {
- curr_task = task_seq_get_next(ns, &curr_tid, true);
+ curr_task = task_seq_get_next(&info->common, &info->tid, true);
if (!curr_task) {
info->task = NULL;
- info->tid = curr_tid;
return NULL;
}
- /* set info->task and info->tid */
+ /* set info->task */
info->task = curr_task;
- if (curr_tid == info->tid) {
+ if (saved_tid == info->tid)
curr_fd = info->fd;
- } else {
- info->tid = curr_tid;
+ else
curr_fd = 0;
- }
}
rcu_read_lock();
@@ -186,9 +223,15 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
/* the current task is done, go to the next task */
rcu_read_unlock();
put_task_struct(curr_task);
+
+ if (info->common.type == BPF_TASK_ITER_TID) {
+ info->task = NULL;
+ return NULL;
+ }
+
info->task = NULL;
info->fd = 0;
- curr_tid = ++(info->tid);
+ saved_tid = ++(info->tid);
goto again;
}
@@ -269,6 +312,8 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
struct bpf_iter_seq_task_common *common = priv_data;
common->ns = get_pid_ns(task_active_pid_ns(current));
+ common->type = aux->task.type;
+ common->tid = aux->task.tid;
return 0;
}
@@ -307,11 +352,10 @@ enum bpf_task_vma_iter_find_op {
static struct vm_area_struct *
task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
{
- struct pid_namespace *ns = info->common.ns;
enum bpf_task_vma_iter_find_op op;
struct vm_area_struct *curr_vma;
struct task_struct *curr_task;
- u32 curr_tid = info->tid;
+ u32 saved_tid = info->tid;
/* If this function returns a non-NULL vma, it holds a reference to
* the task_struct, and holds read lock on vma->mm->mmap_lock.
@@ -371,14 +415,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
}
} else {
again:
- curr_task = task_seq_get_next(ns, &curr_tid, true);
+ curr_task = task_seq_get_next(&info->common, &info->tid, true);
if (!curr_task) {
- info->tid = curr_tid + 1;
+ info->tid++;
goto finish;
}
- if (curr_tid != info->tid) {
- info->tid = curr_tid;
+ if (saved_tid != info->tid) {
/* new task, process the first vma */
op = task_vma_iter_first_vma;
} else {
@@ -430,9 +473,12 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
return curr_vma;
next_task:
+ if (info->common.type == BPF_TASK_ITER_TID)
+ goto finish;
+
put_task_struct(curr_task);
info->task = NULL;
- curr_tid++;
+ info->tid++;
goto again;
finish:
@@ -533,6 +579,7 @@ static const struct bpf_iter_seq_info task_seq_info = {
static struct bpf_iter_reg task_reg_info = {
.target = "task",
+ .attach_target = bpf_iter_attach_task,
.feature = BPF_ITER_RESCHED,
.ctx_arg_info_size = 1,
.ctx_arg_info = {
@@ -551,6 +598,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = {
static struct bpf_iter_reg task_file_reg_info = {
.target = "task_file",
+ .attach_target = bpf_iter_attach_task,
.feature = BPF_ITER_RESCHED,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
@@ -571,6 +619,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = {
static struct bpf_iter_reg task_vma_reg_info = {
.target = "task_vma",
+ .attach_target = bpf_iter_attach_task,
.feature = BPF_ITER_RESCHED,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ffcbf79a556b..ed5ba501609f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -87,10 +87,33 @@ struct bpf_cgroup_storage_key {
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
+enum bpf_task_iter_type {
+ BPF_TASK_ITER_ALL = 0,
+ BPF_TASK_ITER_TID,
+};
+
union bpf_iter_link_info {
struct {
__u32 map_fd;
} map;
+ /*
+ * Parameters of task iterators.
+ */
+ struct {
+ __u32 pid_fd;
+ /*
+ * The type of the iterator.
+ *
+ * It can be one of enum bpf_task_iter_type.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ * The iterator iterates over resources of everyprocess.
+ *
+ * BPF_TASK_ITER_TID
+ * You should also set *pid_fd* to iterate over one task.
+ */
+ __u8 type; /* BPF_TASK_ITER_* */
+ } task;
};
/* BPF syscall commands, see bpf(2) man-page for more details. */
--
2.30.2
next prev parent reply other threads:[~2022-08-01 23:27 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-01 23:26 [PATCH bpf-next v2 0/3] Parameterize task iterators Kui-Feng Lee
2022-08-01 23:26 ` Kui-Feng Lee [this message]
2022-08-02 1:49 ` [PATCH bpf-next v2 1/3] bpf: " Alexei Starovoitov
2022-08-02 16:47 ` Kui-Feng Lee
2022-08-02 21:19 ` Andrii Nakryiko
2022-08-02 3:30 ` Andrii Nakryiko
2022-08-02 16:42 ` Kui-Feng Lee
2022-08-02 21:17 ` Andrii Nakryiko
2022-08-04 23:05 ` Kui-Feng Lee
2022-08-01 23:26 ` [PATCH bpf-next v2 2/3] bpf: Handle bpf_link_info for the parameterized task BPF iterators Kui-Feng Lee
2022-08-01 23:26 ` [PATCH bpf-next v2 3/3] selftests/bpf: Test " Kui-Feng Lee
2022-08-01 23:35 ` [PATCH bpf-next v2 0/3] Parameterize task iterators Kui-Feng Lee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220801232649.2306614-2-kuifeng@fb.com \
--to=kuifeng@fb.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@fb.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).