bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next v4 0/3] Parameterize task iterators.
@ 2022-08-09 19:54 Kui-Feng Lee
  2022-08-09 19:54 ` [PATCH bpf-next v4 1/3] bpf: " Kui-Feng Lee
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2022-08-09 19:54 UTC (permalink / raw)
  To: bpf, ast, daniel, andrii, kernel-team, yhs; +Cc: Kui-Feng Lee

Allow creating an iterator that loops through resources of one task/thread.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested in only the
resources of a specific task or process.  Passing the addintional
parameters, people can now create an iterator to go through all
resources or only the resources of a task.

Major Changes:

 - Add new parameters in bpf_iter_link_info to indicate to go through
   all tasks or to go through a specific task.

 - Change the implementations of BPF iterators of vma, files, and
   tasks to allow going through only the resources of a specific task.

 - Provide the arguments of parameterized task iterators in
   bpf_link_info.

Differences from v3:

 - Fix the test case bpf_iter/test

Differences from v2:

 - Supports tid, tgid, and pidfd.

 - Change 'type' from __u8 to enum bpf_task_iter_type.

v3: https://lore.kernel.org/bpf/20220809063501.667610-1-kuifeng@fb.com/
v2: https://lore.kernel.org/bpf/20220801232649.2306614-1-kuifeng@fb.com/
v1: https://lore.kernel.org/bpf/20220726051713.840431-1-kuifeng@fb.com/

Kui-Feng Lee (3):
  bpf: Parameterize task iterators.
  bpf: Handle bpf_link_info for the parameterized task BPF iterators.
  selftests/bpf: Test parameterized task BPF iterators.

 include/linux/bpf.h                           |   8 +
 include/uapi/linux/bpf.h                      |  43 ++++
 kernel/bpf/task_iter.c                        | 153 +++++++++++--
 tools/include/uapi/linux/bpf.h                |  43 ++++
 .../selftests/bpf/prog_tests/bpf_iter.c       | 208 ++++++++++++++++--
 .../selftests/bpf/prog_tests/btf_dump.c       |   2 +-
 .../selftests/bpf/progs/bpf_iter_task.c       |   9 +
 .../selftests/bpf/progs/bpf_iter_task_file.c  |   7 +
 .../selftests/bpf/progs/bpf_iter_task_vma.c   |   6 +-
 9 files changed, 430 insertions(+), 49 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH bpf-next v4 1/3] bpf: Parameterize task iterators.
  2022-08-09 19:54 [PATCH bpf-next v4 0/3] Parameterize task iterators Kui-Feng Lee
@ 2022-08-09 19:54 ` Kui-Feng Lee
  2022-08-09 22:12   ` Alexei Starovoitov
  2022-08-09 19:54 ` [PATCH bpf-next v4 2/3] bpf: Handle bpf_link_info for the parameterized task BPF iterators Kui-Feng Lee
  2022-08-09 19:54 ` [PATCH bpf-next v4 3/3] selftests/bpf: Test " Kui-Feng Lee
  2 siblings, 1 reply; 8+ messages in thread
From: Kui-Feng Lee @ 2022-08-09 19:54 UTC (permalink / raw)
  To: bpf, ast, daniel, andrii, kernel-team, yhs; +Cc: Kui-Feng Lee

Allow creating an iterator that loops through resources of one task/thread.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process.  Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
---
 include/linux/bpf.h            |   8 ++
 include/uapi/linux/bpf.h       |  36 +++++++++
 kernel/bpf/task_iter.c         | 134 +++++++++++++++++++++++++++------
 tools/include/uapi/linux/bpf.h |  36 +++++++++
 4 files changed, 190 insertions(+), 24 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 11950029284f..bef81324e5f1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1718,6 +1718,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
 
 struct bpf_iter_aux_info {
 	struct bpf_map *map;
+	struct {
+		enum bpf_iter_task_type	type;
+		union {
+			u32 tid;
+			u32 tgid;
+			u32 pid_fd;
+		};
+	} task;
 };
 
 typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ffcbf79a556b..3d0b9e34089f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -87,10 +87,46 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type (enum bpf_attach_type) */
 };
 
+/*
+ * The task type of iterators.
+ *
+ * For BPF task iterators, they can be parameterized with various
+ * parameters to visit only some of tasks.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ *	Iterate over resources of every task.
+ *
+ * BPF_TASK_ITER_TID
+ *	Iterate over resources of a task/tid.
+ *
+ * BPF_TASK_ITER_TGID
+ *	Iterate over reosurces of evevry task of a process / task group.
+ *
+ * BPF_TASK_ITER_PIDFD
+ *	Iterate over resources of every task of a process /task group specified by a pidfd.
+ */
+enum bpf_iter_task_type {
+	BPF_TASK_ITER_ALL = 0,
+	BPF_TASK_ITER_TID,
+	BPF_TASK_ITER_TGID,
+	BPF_TASK_ITER_PIDFD,
+};
+
 union bpf_iter_link_info {
 	struct {
 		__u32	map_fd;
 	} map;
+	/*
+	 * Parameters of task iterators.
+	 */
+	struct {
+		enum bpf_iter_task_type	type;
+		union {
+			__u32 tid;
+			__u32 tgid;
+			__u32 pid_fd;
+		};
+	} task;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8c921799def4..047d94493117 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -12,6 +12,12 @@
 
 struct bpf_iter_seq_task_common {
 	struct pid_namespace *ns;
+	enum bpf_iter_task_type	type;
+	union {
+		u32 tid;
+		u32 tgid;
+		u32 pid_fd;
+	};
 };
 
 struct bpf_iter_seq_task_info {
@@ -22,24 +28,41 @@ struct bpf_iter_seq_task_info {
 	u32 tid;
 };
 
-static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
+static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
 					     u32 *tid,
 					     bool skip_if_dup_files)
 {
 	struct task_struct *task = NULL;
 	struct pid *pid;
 
+	if (common->type == BPF_TASK_ITER_TID) {
+		if (*tid && *tid != common->tid)
+			return NULL;
+		rcu_read_lock();
+		pid = find_pid_ns(common->tid, common->ns);
+		if (pid) {
+			task = get_pid_task(pid, PIDTYPE_PID);
+			*tid = common->tid;
+		}
+		rcu_read_unlock();
+		return task;
+	}
+
 	rcu_read_lock();
 retry:
-	pid = find_ge_pid(*tid, ns);
+	pid = find_ge_pid(*tid, common->ns);
 	if (pid) {
-		*tid = pid_nr_ns(pid, ns);
+		*tid = pid_nr_ns(pid, common->ns);
 		task = get_pid_task(pid, PIDTYPE_PID);
+
+
 		if (!task) {
 			++*tid;
 			goto retry;
-		} else if (skip_if_dup_files && !thread_group_leader(task) &&
-			   task->files == task->group_leader->files) {
+		} else if ((skip_if_dup_files && !thread_group_leader(task) &&
+			    task->files == task->group_leader->files) ||
+			   (common->type == BPF_TASK_ITER_TGID &&
+			    __task_pid_nr_ns(task, PIDTYPE_TGID, common->ns) != common->tgid)) {
 			put_task_struct(task);
 			task = NULL;
 			++*tid;
@@ -56,7 +79,8 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
 	struct bpf_iter_seq_task_info *info = seq->private;
 	struct task_struct *task;
 
-	task = task_seq_get_next(info->common.ns, &info->tid, false);
+	task = task_seq_get_next(&info->common, &info->tid, false);
+
 	if (!task)
 		return NULL;
 
@@ -73,7 +97,8 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	++*pos;
 	++info->tid;
 	put_task_struct((struct task_struct *)v);
-	task = task_seq_get_next(info->common.ns, &info->tid, false);
+
+	task = task_seq_get_next(&info->common, &info->tid, false);
 	if (!task)
 		return NULL;
 
@@ -117,6 +142,50 @@ static void task_seq_stop(struct seq_file *seq, void *v)
 		put_task_struct((struct task_struct *)v);
 }
 
+static int bpf_iter_attach_task(struct bpf_prog *prog,
+				union bpf_iter_link_info *linfo,
+				struct bpf_iter_aux_info *aux)
+{
+	unsigned int flags;
+	struct pid_namespace *ns;
+	struct pid *pid;
+	pid_t tgid;
+
+	if (linfo->task.type == BPF_TASK_ITER_ALL && linfo->task.pid_fd != 0)
+		return -EINVAL;
+
+	aux->task.type = linfo->task.type;
+
+	switch (linfo->task.type) {
+	case BPF_TASK_ITER_TID:
+		aux->task.tid = linfo->task.tid;
+		break;
+	case BPF_TASK_ITER_TGID:
+		aux->task.tgid = linfo->task.tgid;
+		break;
+	case BPF_TASK_ITER_PIDFD:
+		pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
+		if (IS_ERR(pid))
+			return PTR_ERR(pid);
+
+		ns = task_active_pid_ns(current);
+		if (IS_ERR(ns))
+			return PTR_ERR(ns);
+
+		tgid = pid_nr_ns(pid, ns);
+		if (tgid <= 0)
+			return -EINVAL;
+
+		aux->task.tgid = tgid;
+		aux->task.type = BPF_TASK_ITER_TGID;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 static const struct seq_operations task_seq_ops = {
 	.start	= task_seq_start,
 	.next	= task_seq_next,
@@ -137,8 +206,7 @@ struct bpf_iter_seq_task_file_info {
 static struct file *
 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
 {
-	struct pid_namespace *ns = info->common.ns;
-	u32 curr_tid = info->tid;
+	u32 saved_tid = info->tid;
 	struct task_struct *curr_task;
 	unsigned int curr_fd = info->fd;
 
@@ -151,21 +219,18 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
 		curr_task = info->task;
 		curr_fd = info->fd;
 	} else {
-                curr_task = task_seq_get_next(ns, &curr_tid, true);
+		curr_task = task_seq_get_next(&info->common, &info->tid, true);
                 if (!curr_task) {
                         info->task = NULL;
-                        info->tid = curr_tid;
                         return NULL;
                 }
 
-                /* set info->task and info->tid */
+		/* set info->task */
 		info->task = curr_task;
-		if (curr_tid == info->tid) {
+		if (saved_tid == info->tid)
 			curr_fd = info->fd;
-		} else {
-			info->tid = curr_tid;
+		else
 			curr_fd = 0;
-		}
 	}
 
 	rcu_read_lock();
@@ -186,9 +251,15 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
 	/* the current task is done, go to the next task */
 	rcu_read_unlock();
 	put_task_struct(curr_task);
+
+	if (info->common.type == BPF_TASK_ITER_TID) {
+		info->task = NULL;
+		return NULL;
+	}
+
 	info->task = NULL;
 	info->fd = 0;
-	curr_tid = ++(info->tid);
+	saved_tid = ++(info->tid);
 	goto again;
 }
 
@@ -269,6 +340,17 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
 	struct bpf_iter_seq_task_common *common = priv_data;
 
 	common->ns = get_pid_ns(task_active_pid_ns(current));
+	common->type = aux->task.type;
+	switch (common->type) {
+	case BPF_TASK_ITER_TID:
+		common->tid = aux->task.tid;
+		break;
+	case BPF_TASK_ITER_TGID:
+		common->tgid = aux->task.tgid;
+		break;
+	default:
+		break;
+	}
 	return 0;
 }
 
@@ -307,11 +389,10 @@ enum bpf_task_vma_iter_find_op {
 static struct vm_area_struct *
 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
 {
-	struct pid_namespace *ns = info->common.ns;
 	enum bpf_task_vma_iter_find_op op;
 	struct vm_area_struct *curr_vma;
 	struct task_struct *curr_task;
-	u32 curr_tid = info->tid;
+	u32 saved_tid = info->tid;
 
 	/* If this function returns a non-NULL vma, it holds a reference to
 	 * the task_struct, and holds read lock on vma->mm->mmap_lock.
@@ -371,14 +452,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
 		}
 	} else {
 again:
-		curr_task = task_seq_get_next(ns, &curr_tid, true);
+		curr_task = task_seq_get_next(&info->common, &info->tid, true);
 		if (!curr_task) {
-			info->tid = curr_tid + 1;
+			info->tid++;
 			goto finish;
 		}
 
-		if (curr_tid != info->tid) {
-			info->tid = curr_tid;
+		if (saved_tid != info->tid) {
 			/* new task, process the first vma */
 			op = task_vma_iter_first_vma;
 		} else {
@@ -430,9 +510,12 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
 	return curr_vma;
 
 next_task:
+	if (info->common.type == BPF_TASK_ITER_TID)
+		goto finish;
+
 	put_task_struct(curr_task);
 	info->task = NULL;
-	curr_tid++;
+	info->tid++;
 	goto again;
 
 finish:
@@ -533,6 +616,7 @@ static const struct bpf_iter_seq_info task_seq_info = {
 
 static struct bpf_iter_reg task_reg_info = {
 	.target			= "task",
+	.attach_target		= bpf_iter_attach_task,
 	.feature		= BPF_ITER_RESCHED,
 	.ctx_arg_info_size	= 1,
 	.ctx_arg_info		= {
@@ -551,6 +635,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = {
 
 static struct bpf_iter_reg task_file_reg_info = {
 	.target			= "task_file",
+	.attach_target		= bpf_iter_attach_task,
 	.feature		= BPF_ITER_RESCHED,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
@@ -571,6 +656,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = {
 
 static struct bpf_iter_reg task_vma_reg_info = {
 	.target			= "task_vma",
+	.attach_target		= bpf_iter_attach_task,
 	.feature		= BPF_ITER_RESCHED,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ffcbf79a556b..3d0b9e34089f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -87,10 +87,46 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type (enum bpf_attach_type) */
 };
 
+/*
+ * The task type of iterators.
+ *
+ * For BPF task iterators, they can be parameterized with various
+ * parameters to visit only some of tasks.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ *	Iterate over resources of every task.
+ *
+ * BPF_TASK_ITER_TID
+ *	Iterate over resources of a task/tid.
+ *
+ * BPF_TASK_ITER_TGID
+ *	Iterate over reosurces of evevry task of a process / task group.
+ *
+ * BPF_TASK_ITER_PIDFD
+ *	Iterate over resources of every task of a process /task group specified by a pidfd.
+ */
+enum bpf_iter_task_type {
+	BPF_TASK_ITER_ALL = 0,
+	BPF_TASK_ITER_TID,
+	BPF_TASK_ITER_TGID,
+	BPF_TASK_ITER_PIDFD,
+};
+
 union bpf_iter_link_info {
 	struct {
 		__u32	map_fd;
 	} map;
+	/*
+	 * Parameters of task iterators.
+	 */
+	struct {
+		enum bpf_iter_task_type	type;
+		union {
+			__u32 tid;
+			__u32 tgid;
+			__u32 pid_fd;
+		};
+	} task;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH bpf-next v4 2/3] bpf: Handle bpf_link_info for the parameterized task BPF iterators.
  2022-08-09 19:54 [PATCH bpf-next v4 0/3] Parameterize task iterators Kui-Feng Lee
  2022-08-09 19:54 ` [PATCH bpf-next v4 1/3] bpf: " Kui-Feng Lee
@ 2022-08-09 19:54 ` Kui-Feng Lee
  2022-08-09 19:54 ` [PATCH bpf-next v4 3/3] selftests/bpf: Test " Kui-Feng Lee
  2 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2022-08-09 19:54 UTC (permalink / raw)
  To: bpf, ast, daniel, andrii, kernel-team, yhs; +Cc: Kui-Feng Lee

Add new fields to bpf_link_info that users can query it through
bpf_obj_get_info_by_fd().

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
---
 include/uapi/linux/bpf.h       |  7 +++++++
 kernel/bpf/task_iter.c         | 19 +++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  7 +++++++
 3 files changed, 33 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3d0b9e34089f..17b945e87973 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6171,6 +6171,13 @@ struct bpf_link_info {
 				struct {
 					__u32 map_id;
 				} map;
+				struct {
+					enum bpf_iter_task_type type;
+					union {
+						__u32 tid;
+						__u32 tgid;
+					};
+				} task;
 			};
 		} iter;
 		struct  {
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 047d94493117..01213329398f 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -614,6 +614,22 @@ static const struct bpf_iter_seq_info task_seq_info = {
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_info),
 };
 
+static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info)
+{
+	switch (aux->task.type) {
+	case BPF_TASK_ITER_TID:
+		info->iter.task.tid = aux->task.tid;
+		break;
+	case BPF_TASK_ITER_TGID:
+		info->iter.task.tgid = aux->task.tgid;
+		break;
+	default:
+		break;
+	}
+	info->iter.task.type = aux->task.type;
+	return 0;
+}
+
 static struct bpf_iter_reg task_reg_info = {
 	.target			= "task",
 	.attach_target		= bpf_iter_attach_task,
@@ -624,6 +640,7 @@ static struct bpf_iter_reg task_reg_info = {
 		  PTR_TO_BTF_ID_OR_NULL },
 	},
 	.seq_info		= &task_seq_info,
+	.fill_link_info		= bpf_iter_fill_link_info,
 };
 
 static const struct bpf_iter_seq_info task_file_seq_info = {
@@ -645,6 +662,7 @@ static struct bpf_iter_reg task_file_reg_info = {
 		  PTR_TO_BTF_ID_OR_NULL },
 	},
 	.seq_info		= &task_file_seq_info,
+	.fill_link_info		= bpf_iter_fill_link_info,
 };
 
 static const struct bpf_iter_seq_info task_vma_seq_info = {
@@ -666,6 +684,7 @@ static struct bpf_iter_reg task_vma_reg_info = {
 		  PTR_TO_BTF_ID_OR_NULL },
 	},
 	.seq_info		= &task_vma_seq_info,
+	.fill_link_info		= bpf_iter_fill_link_info,
 };
 
 BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3d0b9e34089f..17b945e87973 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6171,6 +6171,13 @@ struct bpf_link_info {
 				struct {
 					__u32 map_id;
 				} map;
+				struct {
+					enum bpf_iter_task_type type;
+					union {
+						__u32 tid;
+						__u32 tgid;
+					};
+				} task;
 			};
 		} iter;
 		struct  {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH bpf-next v4 3/3] selftests/bpf: Test parameterized task BPF iterators.
  2022-08-09 19:54 [PATCH bpf-next v4 0/3] Parameterize task iterators Kui-Feng Lee
  2022-08-09 19:54 ` [PATCH bpf-next v4 1/3] bpf: " Kui-Feng Lee
  2022-08-09 19:54 ` [PATCH bpf-next v4 2/3] bpf: Handle bpf_link_info for the parameterized task BPF iterators Kui-Feng Lee
@ 2022-08-09 19:54 ` Kui-Feng Lee
  2 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2022-08-09 19:54 UTC (permalink / raw)
  To: bpf, ast, daniel, andrii, kernel-team, yhs; +Cc: Kui-Feng Lee

Test iterators of vma, files, and tasks of tasks.

Ensure the API works appropriately to visit all tasks,
tasks in a process, or a particular task.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 208 ++++++++++++++++--
 .../selftests/bpf/prog_tests/btf_dump.c       |   2 +-
 .../selftests/bpf/progs/bpf_iter_task.c       |   9 +
 .../selftests/bpf/progs/bpf_iter_task_file.c  |   7 +
 .../selftests/bpf/progs/bpf_iter_task_vma.c   |   6 +-
 5 files changed, 207 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index a33874b081b6..407429b9eec5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -1,6 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #include <test_progs.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <signal.h>
 #include "bpf_iter_ipv6_route.skel.h"
 #include "bpf_iter_netlink.skel.h"
 #include "bpf_iter_bpf_map.skel.h"
@@ -42,13 +45,13 @@ static void test_btf_id_or_null(void)
 	}
 }
 
-static void do_dummy_read(struct bpf_program *prog)
+static void do_dummy_read(struct bpf_program *prog, struct bpf_iter_attach_opts *opts)
 {
 	struct bpf_link *link;
 	char buf[16] = {};
 	int iter_fd, len;
 
-	link = bpf_program__attach_iter(prog, NULL);
+	link = bpf_program__attach_iter(prog, opts);
 	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		return;
 
@@ -91,7 +94,7 @@ static void test_ipv6_route(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_ipv6_route__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_ipv6_route);
+	do_dummy_read(skel->progs.dump_ipv6_route, NULL);
 
 	bpf_iter_ipv6_route__destroy(skel);
 }
@@ -104,7 +107,7 @@ static void test_netlink(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_netlink__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_netlink);
+	do_dummy_read(skel->progs.dump_netlink, NULL);
 
 	bpf_iter_netlink__destroy(skel);
 }
@@ -117,24 +120,142 @@ static void test_bpf_map(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_map__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_bpf_map);
+	do_dummy_read(skel->progs.dump_bpf_map, NULL);
 
 	bpf_iter_bpf_map__destroy(skel);
 }
 
-static void test_task(void)
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(SYS_pidfd_open, pid, flags);
+}
+
+static void check_bpf_link_info(const struct bpf_program *prog)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	struct bpf_link_info info = {};
+	__u32 info_len;
+	struct bpf_link *link;
+	int err;
+
+	linfo.task.tid = getpid();
+	linfo.task.type = BPF_TASK_ITER_TID;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	link = bpf_program__attach_iter(prog, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
+		return;
+
+	info_len = sizeof(info);
+	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
+	if (ASSERT_OK(err, "bpf_obj_get_info_by_fd")) {
+		ASSERT_EQ(info.iter.task.type, BPF_TASK_ITER_TID, "check_task_type");
+		ASSERT_EQ(info.iter.task.tid, getpid(), "check_task_tid");
+	}
+
+	bpf_link__destroy(link);
+}
+
+static pthread_mutex_t do_nothing_mutex;
+
+static void *do_nothing_wait(void *arg)
+{
+	pthread_mutex_lock(&do_nothing_mutex);
+	pthread_mutex_unlock(&do_nothing_mutex);
+
+	pthread_exit(arg);
+}
+
+static void test_task_(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known)
 {
 	struct bpf_iter_task *skel;
+	pthread_t thread_id;
+	void *ret;
 
 	skel = bpf_iter_task__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_task);
+	if (!ASSERT_OK(pthread_mutex_init(&do_nothing_mutex, NULL), "pthread_mutex_init"))
+		goto done;
+	if (!ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock"))
+		goto done;
+
+	if (!ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL),
+		  "pthread_create"))
+		goto done;
+
+
+	skel->bss->tid = getpid();
+
+	do_dummy_read(skel->progs.dump_task, opts);
 
+	if (!ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock"))
+		goto done;
+
+	if (num_unknown >= 0)
+		ASSERT_EQ(skel->bss->num_unknown_tid, num_unknown, "check_num_unknown_tid");
+	if (num_known >= 0)
+		ASSERT_EQ(skel->bss->num_known_tid, num_known, "check_num_known_tid");
+
+	ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
+		     "pthread_join");
+
+done:
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+
+	linfo.task.tid = getpid();
+	linfo.task.type = BPF_TASK_ITER_TID;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	test_task_(&opts, 0, 1);
+
+	test_task_(NULL, -1, 1);
+}
+
+static void test_task_tgid(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+
+	linfo.task.tgid = getpid();
+	linfo.task.type = BPF_TASK_ITER_TGID;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	test_task_(&opts, 1, 1);
+}
+
+static void test_task_pidfd(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	int pidfd;
+
+	pidfd = pidfd_open(getpid(), 0);
+	if (!ASSERT_GE(pidfd, 0, "pidfd_open"))
+		return;
+
+
+	linfo.task.pid_fd = pidfd;
+	linfo.task.type = BPF_TASK_ITER_PIDFD;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	test_task_(&opts, 1, 1);
+
+	close(pidfd);
+}
+
 static void test_task_sleepable(void)
 {
 	struct bpf_iter_task *skel;
@@ -143,7 +264,7 @@ static void test_task_sleepable(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_task_sleepable);
+	do_dummy_read(skel->progs.dump_task_sleepable, NULL);
 
 	ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
 		  "num_expected_failure_copy_from_user_task");
@@ -161,8 +282,8 @@ static void test_task_stack(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_stack__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_task_stack);
-	do_dummy_read(skel->progs.get_task_user_stacks);
+	do_dummy_read(skel->progs.dump_task_stack, NULL);
+	do_dummy_read(skel->progs.get_task_user_stacks, NULL);
 
 	bpf_iter_task_stack__destroy(skel);
 }
@@ -174,7 +295,9 @@ static void *do_nothing(void *arg)
 
 static void test_task_file(void)
 {
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_task_file *skel;
+	union bpf_iter_link_info linfo;
 	pthread_t thread_id;
 	void *ret;
 
@@ -188,15 +311,31 @@ static void test_task_file(void)
 		  "pthread_create"))
 		goto done;
 
-	do_dummy_read(skel->progs.dump_task_file);
+	linfo.task.tid = getpid();
+	linfo.task.type = BPF_TASK_ITER_TID;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	do_dummy_read(skel->progs.dump_task_file, &opts);
 
 	if (!ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
 		  "pthread_join"))
 		goto done;
 
 	ASSERT_EQ(skel->bss->count, 0, "check_count");
+	ASSERT_EQ(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count");
 
-done:
+	skel->bss->count = 0;
+	skel->bss->unique_tgid_count = 0;
+
+	do_dummy_read(skel->progs.dump_task_file, NULL);
+
+	ASSERT_GE(skel->bss->count, 0, "check_count");
+	ASSERT_GE(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count");
+
+	check_bpf_link_info(skel->progs.dump_task_file);
+
+ done:
 	bpf_iter_task_file__destroy(skel);
 }
 
@@ -274,7 +413,7 @@ static void test_tcp4(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp4__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_tcp4);
+	do_dummy_read(skel->progs.dump_tcp4, NULL);
 
 	bpf_iter_tcp4__destroy(skel);
 }
@@ -287,7 +426,7 @@ static void test_tcp6(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp6__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_tcp6);
+	do_dummy_read(skel->progs.dump_tcp6, NULL);
 
 	bpf_iter_tcp6__destroy(skel);
 }
@@ -300,7 +439,7 @@ static void test_udp4(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_udp4__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_udp4);
+	do_dummy_read(skel->progs.dump_udp4, NULL);
 
 	bpf_iter_udp4__destroy(skel);
 }
@@ -313,7 +452,7 @@ static void test_udp6(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_udp6__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_udp6);
+	do_dummy_read(skel->progs.dump_udp6, NULL);
 
 	bpf_iter_udp6__destroy(skel);
 }
@@ -326,7 +465,7 @@ static void test_unix(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_unix);
+	do_dummy_read(skel->progs.dump_unix, NULL);
 
 	bpf_iter_unix__destroy(skel);
 }
@@ -988,7 +1127,7 @@ static void test_bpf_sk_storage_get(void)
 	if (!ASSERT_OK(err, "bpf_map_update_elem"))
 		goto close_socket;
 
-	do_dummy_read(skel->progs.fill_socket_owner);
+	do_dummy_read(skel->progs.fill_socket_owner, NULL);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
 	if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
@@ -996,7 +1135,7 @@ static void test_bpf_sk_storage_get(void)
 	    getpid(), val, err))
 		goto close_socket;
 
-	do_dummy_read(skel->progs.negate_socket_local_storage);
+	do_dummy_read(skel->progs.negate_socket_local_storage, NULL);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
 	CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
@@ -1116,7 +1255,7 @@ static void test_link_iter(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_link__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_bpf_link);
+	do_dummy_read(skel->progs.dump_bpf_link, NULL);
 
 	bpf_iter_bpf_link__destroy(skel);
 }
@@ -1129,7 +1268,7 @@ static void test_ksym_iter(void)
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_ksym__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_ksym);
+	do_dummy_read(skel->progs.dump_ksym, NULL);
 
 	bpf_iter_ksym__destroy(skel);
 }
@@ -1154,7 +1293,7 @@ static void str_strip_first_line(char *str)
 	*dst = '\0';
 }
 
-static void test_task_vma(void)
+static void test_task_vma_(struct bpf_iter_attach_opts *opts)
 {
 	int err, iter_fd = -1, proc_maps_fd = -1;
 	struct bpf_iter_task_vma *skel;
@@ -1166,13 +1305,14 @@ static void test_task_vma(void)
 		return;
 
 	skel->bss->pid = getpid();
+	skel->bss->one_task = opts ? 1 : 0;
 
 	err = bpf_iter_task_vma__load(skel);
 	if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
 		goto out;
 
 	skel->links.proc_maps = bpf_program__attach_iter(
-		skel->progs.proc_maps, NULL);
+		skel->progs.proc_maps, opts);
 
 	if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
 		skel->links.proc_maps = NULL;
@@ -1211,12 +1351,30 @@ static void test_task_vma(void)
 	str_strip_first_line(proc_maps_output);
 
 	ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output");
+
+	check_bpf_link_info(skel->progs.proc_maps);
+
 out:
 	close(proc_maps_fd);
 	close(iter_fd);
 	bpf_iter_task_vma__destroy(skel);
 }
 
+static void test_task_vma(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.task.tid = getpid();
+	linfo.task.type = BPF_TASK_ITER_TID;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	test_task_vma_(&opts);
+	test_task_vma_(NULL);
+}
+
 void test_bpf_iter(void)
 {
 	if (test__start_subtest("btf_id_or_null"))
@@ -1229,6 +1387,10 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_tgid"))
+		test_task_tgid();
+	if (test__start_subtest("task_pidfd"))
+		test_task_pidfd();
 	if (test__start_subtest("task_sleepable"))
 		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 5fce7008d1ff..6a41e6a03154 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -764,7 +764,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
 
 	/* union with nested struct */
 	TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
-			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
+			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.task = (struct){.type = (enum bpf_iter_task_type)BPF_TASK_ITER_TID,},}",
 			   { .map = { .map_fd = 1 }});
 
 	/* struct skb with nested structs/unions; because type output is so
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index d22741272692..96131b9a1caa 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -6,6 +6,10 @@
 
 char _license[] SEC("license") = "GPL";
 
+uint32_t tid = 0;
+int num_unknown_tid = 0;
+int num_known_tid = 0;
+
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
@@ -18,6 +22,11 @@ int dump_task(struct bpf_iter__task *ctx)
 		return 0;
 	}
 
+	if (task->pid != tid)
+		num_unknown_tid++;
+	else
+		num_known_tid++;
+
 	if (ctx->meta->seq_num == 0)
 		BPF_SEQ_PRINTF(seq, "    tgid      gid\n");
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 6e7b400888fe..031455ed8748 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -7,6 +7,8 @@ char _license[] SEC("license") = "GPL";
 
 int count = 0;
 int tgid = 0;
+int last_tgid = -1;
+int unique_tgid_count = 0;
 
 SEC("iter/task_file")
 int dump_task_file(struct bpf_iter__task_file *ctx)
@@ -27,6 +29,11 @@ int dump_task_file(struct bpf_iter__task_file *ctx)
 	if (tgid == task->tgid && task->tgid != task->pid)
 		count++;
 
+	if (last_tgid != task->tgid) {
+		last_tgid = task->tgid;
+		unique_tgid_count++;
+	}
+
 	BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
 		       (long)file->f_op);
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
index 4ea6a37d1345..44f4a31c2ddd 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
@@ -20,6 +20,7 @@ char _license[] SEC("license") = "GPL";
 #define D_PATH_BUF_SIZE 1024
 char d_path_buf[D_PATH_BUF_SIZE] = {};
 __u32 pid = 0;
+__u32 one_task = 0;
 
 SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
 {
@@ -33,8 +34,11 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
 		return 0;
 
 	file = vma->vm_file;
-	if (task->tgid != pid)
+	if (task->tgid != pid) {
+		if (one_task)
+			BPF_SEQ_PRINTF(seq, "unexpected task (%d != %d)", task->tgid, pid);
 		return 0;
+	}
 	perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-';
 	perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-';
 	perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-';
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v4 1/3] bpf: Parameterize task iterators.
  2022-08-09 19:54 ` [PATCH bpf-next v4 1/3] bpf: " Kui-Feng Lee
@ 2022-08-09 22:12   ` Alexei Starovoitov
  2022-08-09 22:35     ` Kui-Feng Lee
  0 siblings, 1 reply; 8+ messages in thread
From: Alexei Starovoitov @ 2022-08-09 22:12 UTC (permalink / raw)
  To: Kui-Feng Lee
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Kernel Team, Yonghong Song

On Tue, Aug 9, 2022 at 12:54 PM Kui-Feng Lee <kuifeng@fb.com> wrote:
>
> Allow creating an iterator that loops through resources of one task/thread.
>
> People could only create iterators to loop through all resources of
> files, vma, and tasks in the system, even though they were interested
> in only the resources of a specific task or process.  Passing the
> additional parameters, people can now create an iterator to go
> through all resources or only the resources of a task.
>
> Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
> ---
>  include/linux/bpf.h            |   8 ++
>  include/uapi/linux/bpf.h       |  36 +++++++++
>  kernel/bpf/task_iter.c         | 134 +++++++++++++++++++++++++++------
>  tools/include/uapi/linux/bpf.h |  36 +++++++++
>  4 files changed, 190 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 11950029284f..bef81324e5f1 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1718,6 +1718,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
>
>  struct bpf_iter_aux_info {
>         struct bpf_map *map;
> +       struct {
> +               enum bpf_iter_task_type type;
> +               union {
> +                       u32 tid;
> +                       u32 tgid;
> +                       u32 pid_fd;
> +               };
> +       } task;
>  };
>
>  typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index ffcbf79a556b..3d0b9e34089f 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -87,10 +87,46 @@ struct bpf_cgroup_storage_key {
>         __u32   attach_type;            /* program attach type (enum bpf_attach_type) */
>  };
>
> +/*
> + * The task type of iterators.
> + *
> + * For BPF task iterators, they can be parameterized with various
> + * parameters to visit only some of tasks.
> + *
> + * BPF_TASK_ITER_ALL (default)
> + *     Iterate over resources of every task.
> + *
> + * BPF_TASK_ITER_TID
> + *     Iterate over resources of a task/tid.
> + *
> + * BPF_TASK_ITER_TGID
> + *     Iterate over reosurces of evevry task of a process / task group.
> + *
> + * BPF_TASK_ITER_PIDFD
> + *     Iterate over resources of every task of a process /task group specified by a pidfd.
> + */
> +enum bpf_iter_task_type {
> +       BPF_TASK_ITER_ALL = 0,
> +       BPF_TASK_ITER_TID,
> +       BPF_TASK_ITER_TGID,
> +       BPF_TASK_ITER_PIDFD,
> +};
> +
>  union bpf_iter_link_info {
>         struct {
>                 __u32   map_fd;
>         } map;
> +       /*
> +        * Parameters of task iterators.
> +        */
> +       struct {
> +               enum bpf_iter_task_type type;
> +               union {
> +                       __u32 tid;
> +                       __u32 tgid;
> +                       __u32 pid_fd;
> +               };

Sorry I'm late to this discussion, but
with enum and with union we kinda tell
the kernel the same information twice.
Here is how the selftest looks:
+       linfo.task.tid = getpid();
+       linfo.task.type = BPF_TASK_ITER_TID;

first line -> use tid.
second line -> yeah. I really meant the tid.

Instead of union and type can we do:
> +                       __u32 tid;
> +                       __u32 tgid;
> +                       __u32 pid_fd;

as 3 separate fields?
The kernel would have to check that only one
of them is set.

I could have missed an earlier discussion on this subj.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v4 1/3] bpf: Parameterize task iterators.
  2022-08-09 22:12   ` Alexei Starovoitov
@ 2022-08-09 22:35     ` Kui-Feng Lee
  2022-08-10  1:08       ` Alexei Starovoitov
  0 siblings, 1 reply; 8+ messages in thread
From: Kui-Feng Lee @ 2022-08-09 22:35 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: daniel, Kernel Team, Yonghong Song, ast, andrii, bpf

On Tue, 2022-08-09 at 15:12 -0700, Alexei Starovoitov wrote:
> On Tue, Aug 9, 2022 at 12:54 PM Kui-Feng Lee <kuifeng@fb.com> wrote:
> > 
> > Allow creating an iterator that loops through resources of one
> > task/thread.
> > 
> > People could only create iterators to loop through all resources of
> > files, vma, and tasks in the system, even though they were
> > interested
> > in only the resources of a specific task or process.  Passing the
> > additional parameters, people can now create an iterator to go
> > through all resources or only the resources of a task.
> > 
> > Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
> > ---
> >  include/linux/bpf.h            |   8 ++
> >  include/uapi/linux/bpf.h       |  36 +++++++++
> >  kernel/bpf/task_iter.c         | 134 +++++++++++++++++++++++++++--
> > ----
> >  tools/include/uapi/linux/bpf.h |  36 +++++++++
> >  4 files changed, 190 insertions(+), 24 deletions(-)
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 11950029284f..bef81324e5f1 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -1718,6 +1718,14 @@ int bpf_obj_get_user(const char __user
> > *pathname, int flags);
> > 
> >  struct bpf_iter_aux_info {
> >         struct bpf_map *map;
> > +       struct {
> > +               enum bpf_iter_task_type type;
> > +               union {
> > +                       u32 tid;
> > +                       u32 tgid;
> > +                       u32 pid_fd;
> > +               };
> > +       } task;
> >  };
> > 
> >  typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index ffcbf79a556b..3d0b9e34089f 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -87,10 +87,46 @@ struct bpf_cgroup_storage_key {
> >         __u32   attach_type;            /* program attach type
> > (enum bpf_attach_type) */
> >  };
> > 
> > +/*
> > + * The task type of iterators.
> > + *
> > + * For BPF task iterators, they can be parameterized with various
> > + * parameters to visit only some of tasks.
> > + *
> > + * BPF_TASK_ITER_ALL (default)
> > + *     Iterate over resources of every task.
> > + *
> > + * BPF_TASK_ITER_TID
> > + *     Iterate over resources of a task/tid.
> > + *
> > + * BPF_TASK_ITER_TGID
> > + *     Iterate over reosurces of evevry task of a process / task
> > group.
> > + *
> > + * BPF_TASK_ITER_PIDFD
> > + *     Iterate over resources of every task of a process /task
> > group specified by a pidfd.
> > + */
> > +enum bpf_iter_task_type {
> > +       BPF_TASK_ITER_ALL = 0,
> > +       BPF_TASK_ITER_TID,
> > +       BPF_TASK_ITER_TGID,
> > +       BPF_TASK_ITER_PIDFD,
> > +};
> > +
> >  union bpf_iter_link_info {
> >         struct {
> >                 __u32   map_fd;
> >         } map;
> > +       /*
> > +        * Parameters of task iterators.
> > +        */
> > +       struct {
> > +               enum bpf_iter_task_type type;
> > +               union {
> > +                       __u32 tid;
> > +                       __u32 tgid;
> > +                       __u32 pid_fd;
> > +               };
> 
> Sorry I'm late to this discussion, but
> with enum and with union we kinda tell
> the kernel the same information twice.
> Here is how the selftest looks:
> +       linfo.task.tid = getpid();
> +       linfo.task.type = BPF_TASK_ITER_TID;
> 
> first line -> use tid.
> second line -> yeah. I really meant the tid.
> 
> Instead of union and type can we do:
> > +                       __u32 tid;
> > +                       __u32 tgid;
> > +                       __u32 pid_fd;
> 
> as 3 separate fields?
> The kernel would have to check that only one
> of them is set.
> 
> I could have missed an earlier discussion on this subj.

We may have other parameter types later, for example, cgroups.
Unfortunately, we don't have tagged enum or tagged union, like what
Rust or Haskell has, in C.  A separated 'type' field would be easier
and clear to distinguish them.  With 3 separated fields, people may
wonder if they can be set them all, or just one of them, in my opinion.
With an union, people should know only one of them should be set.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v4 1/3] bpf: Parameterize task iterators.
  2022-08-09 22:35     ` Kui-Feng Lee
@ 2022-08-10  1:08       ` Alexei Starovoitov
  2022-08-10 17:43         ` Kui-Feng Lee
  0 siblings, 1 reply; 8+ messages in thread
From: Alexei Starovoitov @ 2022-08-10  1:08 UTC (permalink / raw)
  To: Kui-Feng Lee; +Cc: daniel, Kernel Team, Yonghong Song, ast, andrii, bpf

On Tue, Aug 9, 2022 at 3:35 PM Kui-Feng Lee <kuifeng@fb.com> wrote:
>
> On Tue, 2022-08-09 at 15:12 -0700, Alexei Starovoitov wrote:
> > On Tue, Aug 9, 2022 at 12:54 PM Kui-Feng Lee <kuifeng@fb.com> wrote:
> > >
> > > Allow creating an iterator that loops through resources of one
> > > task/thread.
> > >
> > > People could only create iterators to loop through all resources of
> > > files, vma, and tasks in the system, even though they were
> > > interested
> > > in only the resources of a specific task or process.  Passing the
> > > additional parameters, people can now create an iterator to go
> > > through all resources or only the resources of a task.
> > >
> > > Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
> > > ---
> > >  include/linux/bpf.h            |   8 ++
> > >  include/uapi/linux/bpf.h       |  36 +++++++++
> > >  kernel/bpf/task_iter.c         | 134 +++++++++++++++++++++++++++--
> > > ----
> > >  tools/include/uapi/linux/bpf.h |  36 +++++++++
> > >  4 files changed, 190 insertions(+), 24 deletions(-)
> > >
> > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > index 11950029284f..bef81324e5f1 100644
> > > --- a/include/linux/bpf.h
> > > +++ b/include/linux/bpf.h
> > > @@ -1718,6 +1718,14 @@ int bpf_obj_get_user(const char __user
> > > *pathname, int flags);
> > >
> > >  struct bpf_iter_aux_info {
> > >         struct bpf_map *map;
> > > +       struct {
> > > +               enum bpf_iter_task_type type;
> > > +               union {
> > > +                       u32 tid;
> > > +                       u32 tgid;
> > > +                       u32 pid_fd;
> > > +               };
> > > +       } task;
> > >  };
> > >
> > >  typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index ffcbf79a556b..3d0b9e34089f 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -87,10 +87,46 @@ struct bpf_cgroup_storage_key {
> > >         __u32   attach_type;            /* program attach type
> > > (enum bpf_attach_type) */
> > >  };
> > >
> > > +/*
> > > + * The task type of iterators.
> > > + *
> > > + * For BPF task iterators, they can be parameterized with various
> > > + * parameters to visit only some of tasks.
> > > + *
> > > + * BPF_TASK_ITER_ALL (default)
> > > + *     Iterate over resources of every task.
> > > + *
> > > + * BPF_TASK_ITER_TID
> > > + *     Iterate over resources of a task/tid.
> > > + *
> > > + * BPF_TASK_ITER_TGID
> > > + *     Iterate over reosurces of evevry task of a process / task
> > > group.
> > > + *
> > > + * BPF_TASK_ITER_PIDFD
> > > + *     Iterate over resources of every task of a process /task
> > > group specified by a pidfd.
> > > + */
> > > +enum bpf_iter_task_type {
> > > +       BPF_TASK_ITER_ALL = 0,
> > > +       BPF_TASK_ITER_TID,
> > > +       BPF_TASK_ITER_TGID,
> > > +       BPF_TASK_ITER_PIDFD,
> > > +};
> > > +
> > >  union bpf_iter_link_info {
> > >         struct {
> > >                 __u32   map_fd;
> > >         } map;
> > > +       /*
> > > +        * Parameters of task iterators.
> > > +        */
> > > +       struct {
> > > +               enum bpf_iter_task_type type;
> > > +               union {
> > > +                       __u32 tid;
> > > +                       __u32 tgid;
> > > +                       __u32 pid_fd;
> > > +               };
> >
> > Sorry I'm late to this discussion, but
> > with enum and with union we kinda tell
> > the kernel the same information twice.
> > Here is how the selftest looks:
> > +       linfo.task.tid = getpid();
> > +       linfo.task.type = BPF_TASK_ITER_TID;
> >
> > first line -> use tid.
> > second line -> yeah. I really meant the tid.
> >
> > Instead of union and type can we do:
> > > +                       __u32 tid;
> > > +                       __u32 tgid;
> > > +                       __u32 pid_fd;
> >
> > as 3 separate fields?
> > The kernel would have to check that only one
> > of them is set.
> >
> > I could have missed an earlier discussion on this subj.
>
> We may have other parameter types later, for example, cgroups.
> Unfortunately, we don't have tagged enum or tagged union, like what
> Rust or Haskell has, in C.  A separated 'type' field would be easier
> and clear to distinguish them.  With 3 separated fields, people may
> wonder if they can be set them all, or just one of them, in my opinion.
> With an union, people should know only one of them should be set.

What stops us adding new fields to the end in such a case?
Some combinations will not be meaningful and the kernel
would have to check and error regardless.
Imagine extending union:
struct {
  enum bpf_iter_task_type type;
  union {
     struct {
        __u32 tid;
        __u64 something_else;
     };
     __u32 tgid;
     __u32 pid_fd;
  };
};

and now we're suddenly hitting the same issue we discussed
with struct bpf_link_info in the other thread due to alignment
increasing from 4 to 8 bytes.
We might even need bpf_iter_link_info _v2.

If 'something_else' is u32 the kernel still needs to check
that it's zero in the tgid and pid_fd cases.
If we're extending fields we can add a comment:
struct {
  __u32 tid;
  __u32 tgid;
  __u32 pid_fd;
  __u32 something_else; /* useful in combination with tid */
};
and it's obvious what is used with what.

It still feels that 3 different fields are easier to use.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v4 1/3] bpf: Parameterize task iterators.
  2022-08-10  1:08       ` Alexei Starovoitov
@ 2022-08-10 17:43         ` Kui-Feng Lee
  0 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2022-08-10 17:43 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: daniel, Kernel Team, Yonghong Song, ast, andrii, bpf

On Tue, 2022-08-09 at 18:08 -0700, Alexei Starovoitov wrote:
> On Tue, Aug 9, 2022 at 3:35 PM Kui-Feng Lee <kuifeng@fb.com> wrote:
> > 
> > On Tue, 2022-08-09 at 15:12 -0700, Alexei Starovoitov wrote:
> > > On Tue, Aug 9, 2022 at 12:54 PM Kui-Feng Lee <kuifeng@fb.com>
> > > wrote:
> > > > 
> > > > Allow creating an iterator that loops through resources of one
> > > > task/thread.
> > > > 
> > > > People could only create iterators to loop through all
> > > > resources of
> > > > files, vma, and tasks in the system, even though they were
> > > > interested
> > > > in only the resources of a specific task or process.  Passing
> > > > the
> > > > additional parameters, people can now create an iterator to go
> > > > through all resources or only the resources of a task.
> > > > 
> > > > Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
> > > > ---
> > > >  include/linux/bpf.h            |   8 ++
> > > >  include/uapi/linux/bpf.h       |  36 +++++++++
> > > >  kernel/bpf/task_iter.c         | 134
> > > > +++++++++++++++++++++++++++--
> > > > ----
> > > >  tools/include/uapi/linux/bpf.h |  36 +++++++++
> > > >  4 files changed, 190 insertions(+), 24 deletions(-)
> > > > 
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index 11950029284f..bef81324e5f1 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -1718,6 +1718,14 @@ int bpf_obj_get_user(const char __user
> > > > *pathname, int flags);
> > > > 
> > > >  struct bpf_iter_aux_info {
> > > >         struct bpf_map *map;
> > > > +       struct {
> > > > +               enum bpf_iter_task_type type;
> > > > +               union {
> > > > +                       u32 tid;
> > > > +                       u32 tgid;
> > > > +                       u32 pid_fd;
> > > > +               };
> > > > +       } task;
> > > >  };
> > > > 
> > > >  typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
> > > > diff --git a/include/uapi/linux/bpf.h
> > > > b/include/uapi/linux/bpf.h
> > > > index ffcbf79a556b..3d0b9e34089f 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -87,10 +87,46 @@ struct bpf_cgroup_storage_key {
> > > >         __u32   attach_type;            /* program attach type
> > > > (enum bpf_attach_type) */
> > > >  };
> > > > 
> > > > +/*
> > > > + * The task type of iterators.
> > > > + *
> > > > + * For BPF task iterators, they can be parameterized with
> > > > various
> > > > + * parameters to visit only some of tasks.
> > > > + *
> > > > + * BPF_TASK_ITER_ALL (default)
> > > > + *     Iterate over resources of every task.
> > > > + *
> > > > + * BPF_TASK_ITER_TID
> > > > + *     Iterate over resources of a task/tid.
> > > > + *
> > > > + * BPF_TASK_ITER_TGID
> > > > + *     Iterate over reosurces of evevry task of a process /
> > > > task
> > > > group.
> > > > + *
> > > > + * BPF_TASK_ITER_PIDFD
> > > > + *     Iterate over resources of every task of a process /task
> > > > group specified by a pidfd.
> > > > + */
> > > > +enum bpf_iter_task_type {
> > > > +       BPF_TASK_ITER_ALL = 0,
> > > > +       BPF_TASK_ITER_TID,
> > > > +       BPF_TASK_ITER_TGID,
> > > > +       BPF_TASK_ITER_PIDFD,
> > > > +};
> > > > +
> > > >  union bpf_iter_link_info {
> > > >         struct {
> > > >                 __u32   map_fd;
> > > >         } map;
> > > > +       /*
> > > > +        * Parameters of task iterators.
> > > > +        */
> > > > +       struct {
> > > > +               enum bpf_iter_task_type type;
> > > > +               union {
> > > > +                       __u32 tid;
> > > > +                       __u32 tgid;
> > > > +                       __u32 pid_fd;
> > > > +               };
> > > 
> > > Sorry I'm late to this discussion, but
> > > with enum and with union we kinda tell
> > > the kernel the same information twice.
> > > Here is how the selftest looks:
> > > +       linfo.task.tid = getpid();
> > > +       linfo.task.type = BPF_TASK_ITER_TID;
> > > 
> > > first line -> use tid.
> > > second line -> yeah. I really meant the tid.
> > > 
> > > Instead of union and type can we do:
> > > > +                       __u32 tid;
> > > > +                       __u32 tgid;
> > > > +                       __u32 pid_fd;
> > > 
> > > as 3 separate fields?
> > > The kernel would have to check that only one
> > > of them is set.
> > > 
> > > I could have missed an earlier discussion on this subj.
> > 
> > We may have other parameter types later, for example, cgroups.
> > Unfortunately, we don't have tagged enum or tagged union, like what
> > Rust or Haskell has, in C.  A separated 'type' field would be
> > easier
> > and clear to distinguish them.  With 3 separated fields, people may
> > wonder if they can be set them all, or just one of them, in my
> > opinion.
> > With an union, people should know only one of them should be set.
> 
> What stops us adding new fields to the end in such a case?
> Some combinations will not be meaningful and the kernel
> would have to check and error regardless.
> Imagine extending union:
> struct {
>   enum bpf_iter_task_type type;
>   union {
>      struct {
>         __u32 tid;
>         __u64 something_else;
>      };
>      __u32 tgid;
>      __u32 pid_fd;
>   };
> };
> 
> and now we're suddenly hitting the same issue we discussed
> with struct bpf_link_info in the other thread due to alignment
> increasing from 4 to 8 bytes.
> We might even need bpf_iter_link_info _v2.

It is a good point.  In that case, we probably need task_v2 instead of
bpf_iter_link_info_v2.  The other solution is to make whole 'task' as
an union instead of a struct.

union bpf_iter_link_info {
    ......
    union {
        enum bpf_iter_task_type type;
        struct {
            enum bpf_iter_task_type tid__type;
            __u32 tid;
        };
        struct {
            enum bpf_iter_task_type tgid__type;
            __u32 tgid;
        };
        ......
    } task;
}

Even adding something new, it doesn't affect the offsets of old fields.

> 
> If 'something_else' is u32 the kernel still needs to check
> that it's zero in the tgid and pid_fd cases.
> If we're extending fields we can add a comment:
> struct {
>   __u32 tid;
>   __u32 tgid;
>   __u32 pid_fd;
>   __u32 something_else; /* useful in combination with tid */
> };
> and it's obvious what is used with what.
> 
> It still feels that 3 different fields are easier to use.

Agree!  Having 3 separated fields is easier to use for assigning only
one value.



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-08-10 17:43 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-09 19:54 [PATCH bpf-next v4 0/3] Parameterize task iterators Kui-Feng Lee
2022-08-09 19:54 ` [PATCH bpf-next v4 1/3] bpf: " Kui-Feng Lee
2022-08-09 22:12   ` Alexei Starovoitov
2022-08-09 22:35     ` Kui-Feng Lee
2022-08-10  1:08       ` Alexei Starovoitov
2022-08-10 17:43         ` Kui-Feng Lee
2022-08-09 19:54 ` [PATCH bpf-next v4 2/3] bpf: Handle bpf_link_info for the parameterized task BPF iterators Kui-Feng Lee
2022-08-09 19:54 ` [PATCH bpf-next v4 3/3] selftests/bpf: Test " Kui-Feng Lee

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).