* [PATCH v4 bpf-next 1/4] bpf: introduce task_vma bpf_iter
2021-02-04 20:49 [PATCH v4 bpf-next 0/4] introduce bpf_iter for task_vma Song Liu
@ 2021-02-04 20:49 ` Song Liu
2021-02-08 4:30 ` Yonghong Song
2021-02-04 20:49 ` [PATCH v4 bpf-next 2/4] bpf: allow bpf_d_path in sleepable bpf_iter program Song Liu
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Song Liu @ 2021-02-04 20:49 UTC (permalink / raw)
To: bpf, netdev, linux-mm; +Cc: ast, daniel, kernel-team, akpm, Song Liu
Introduce task_vma bpf_iter to print memory information of a process. It
can be used to print customized information similar to /proc/<pid>/maps.
Current /proc/<pid>/maps and /proc/<pid>/smaps provide information of
vma's of a process. However, these information are not flexible enough to
cover all use cases. For example, if a vma cover mixed 2MB pages and 4kB
pages (x86_64), there is no easy way to tell which address ranges are
backed by 2MB pages. task_vma solves the problem by enabling the user to
generate customize information based on the vma (and vma->vm_mm,
vma->vm_file, etc.).
To access the vma safely in the BPF program, task_vma iterator holds
target mmap_lock while calling the BPF program. If the mmap_lock is
contended, task_vma unlocks mmap_lock between iterations to unblock the
writer(s). This lock contention avoidance mechanism is similar to the one
used in show_smaps_rollup().
Signed-off-by: Song Liu <songliubraving@fb.com>
---
kernel/bpf/task_iter.c | 215 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 214 insertions(+), 1 deletion(-)
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 175b7b42bfc46..31e63b6c3d718 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -286,9 +286,196 @@ static const struct seq_operations task_file_seq_ops = {
.show = task_file_seq_show,
};
+struct bpf_iter_seq_task_vma_info {
+ /* The first field must be struct bpf_iter_seq_task_common.
+ * this is assumed by {init, fini}_seq_pidns() callback functions.
+ */
+ struct bpf_iter_seq_task_common common;
+ struct task_struct *task;
+ struct vm_area_struct *vma;
+ u32 tid;
+ unsigned long prev_vm_start;
+ unsigned long prev_vm_end;
+};
+
+enum bpf_task_vma_iter_find_op {
+ task_vma_iter_first_vma, /* use mm->mmap */
+ task_vma_iter_next_vma, /* use curr_vma->vm_next */
+ task_vma_iter_find_vma, /* use find_vma() to find next vma */
+};
+
+static struct vm_area_struct *
+task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
+{
+ struct pid_namespace *ns = info->common.ns;
+ enum bpf_task_vma_iter_find_op op;
+ struct vm_area_struct *curr_vma;
+ struct task_struct *curr_task;
+ u32 curr_tid = info->tid;
+
+ /* If this function returns a non-NULL vma, it holds a reference to
+ * the task_struct, and holds read lock on vma->mm->mmap_lock.
+ * If this function returns NULL, it does not hold any reference or
+ * lock.
+ */
+ if (info->task) {
+ curr_task = info->task;
+ curr_vma = info->vma;
+ /* In case of lock contention, drop mmap_lock to unblock
+ * the writer.
+ */
+ if (mmap_lock_is_contended(curr_task->mm)) {
+ info->prev_vm_start = curr_vma->vm_start;
+ info->prev_vm_end = curr_vma->vm_end;
+ op = task_vma_iter_find_vma;
+ mmap_read_unlock(curr_task->mm);
+ if (mmap_read_lock_killable(curr_task->mm))
+ goto finish;
+ } else {
+ op = task_vma_iter_next_vma;
+ }
+ } else {
+again:
+ curr_task = task_seq_get_next(ns, &curr_tid, true);
+ if (!curr_task) {
+ info->tid = curr_tid + 1;
+ goto finish;
+ }
+
+ if (curr_tid != info->tid) {
+ info->tid = curr_tid;
+ op = task_vma_iter_first_vma;
+ } else {
+ op = task_vma_iter_find_vma;
+ }
+
+ if (!curr_task->mm)
+ goto next_task;
+
+ if (mmap_read_lock_killable(curr_task->mm))
+ goto finish;
+ }
+
+ switch (op) {
+ case task_vma_iter_first_vma:
+ curr_vma = curr_task->mm->mmap;
+ break;
+ case task_vma_iter_next_vma:
+ curr_vma = curr_vma->vm_next;
+ break;
+ case task_vma_iter_find_vma:
+ /* We dropped mmap_lock so it is necessary to use find_vma
+ * to find the next vma. This is similar to the mechanism
+ * in show_smaps_rollup().
+ */
+ curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1);
+
+ if (curr_vma && (curr_vma->vm_start == info->prev_vm_start))
+ curr_vma = curr_vma->vm_next;
+ break;
+ }
+ if (!curr_vma) {
+ mmap_read_unlock(curr_task->mm);
+ goto next_task;
+ }
+ info->task = curr_task;
+ info->vma = curr_vma;
+ return curr_vma;
+
+next_task:
+ put_task_struct(curr_task);
+ info->task = NULL;
+ curr_tid++;
+ goto again;
+
+finish:
+ info->task = NULL;
+ info->vma = NULL;
+ return NULL;
+}
+
+static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_task_vma_info *info = seq->private;
+ struct vm_area_struct *vma;
+
+ vma = task_vma_seq_get_next(info);
+ if (vma && *pos == 0)
+ ++*pos;
+
+ return vma;
+}
+
+static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_task_vma_info *info = seq->private;
+
+ ++*pos;
+ return task_vma_seq_get_next(info);
+}
+
+struct bpf_iter__task_vma {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct task_struct *, task);
+ __bpf_md_ptr(struct vm_area_struct *, vma);
+};
+
+DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta,
+ struct task_struct *task, struct vm_area_struct *vma)
+
+static int __task_vma_seq_show(struct seq_file *seq, bool in_stop)
+{
+ struct bpf_iter_seq_task_vma_info *info = seq->private;
+ struct bpf_iter__task_vma ctx;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.task = info->task;
+ ctx.vma = info->vma;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int task_vma_seq_show(struct seq_file *seq, void *v)
+{
+ return __task_vma_seq_show(seq, false);
+}
+
+static void task_vma_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_task_vma_info *info = seq->private;
+
+ if (!v) {
+ (void)__task_vma_seq_show(seq, true);
+ } else {
+ /* Set prev_vm_start to ~0UL, so that we don't skip the
+ * vma returned by the next find_vma(). Please refer to
+ * case task_vma_iter_find_vma in task_vma_seq_get_next().
+ */
+ info->prev_vm_start = ~0UL;
+ info->prev_vm_end = info->vma->vm_end;
+ mmap_read_unlock(info->task->mm);
+ put_task_struct(info->task);
+ info->task = NULL;
+ }
+}
+
+static const struct seq_operations task_vma_seq_ops = {
+ .start = task_vma_seq_start,
+ .next = task_vma_seq_next,
+ .stop = task_vma_seq_stop,
+ .show = task_vma_seq_show,
+};
+
BTF_ID_LIST(btf_task_file_ids)
BTF_ID(struct, task_struct)
BTF_ID(struct, file)
+BTF_ID(struct, vm_area_struct)
static const struct bpf_iter_seq_info task_seq_info = {
.seq_ops = &task_seq_ops,
@@ -328,6 +515,26 @@ static struct bpf_iter_reg task_file_reg_info = {
.seq_info = &task_file_seq_info,
};
+static const struct bpf_iter_seq_info task_vma_seq_info = {
+ .seq_ops = &task_vma_seq_ops,
+ .init_seq_private = init_seq_pidns,
+ .fini_seq_private = fini_seq_pidns,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_task_vma_info),
+};
+
+static struct bpf_iter_reg task_vma_reg_info = {
+ .target = "task_vma",
+ .feature = BPF_ITER_RESCHED,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__task_vma, task),
+ PTR_TO_BTF_ID_OR_NULL },
+ { offsetof(struct bpf_iter__task_vma, vma),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &task_vma_seq_info,
+};
+
static int __init task_iter_init(void)
{
int ret;
@@ -339,6 +546,12 @@ static int __init task_iter_init(void)
task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
- return bpf_iter_reg_target(&task_file_reg_info);
+ ret = bpf_iter_reg_target(&task_file_reg_info);
+ if (ret)
+ return ret;
+
+ task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
+ task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[2];
+ return bpf_iter_reg_target(&task_vma_reg_info);
}
late_initcall(task_iter_init);
--
2.24.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v4 bpf-next 1/4] bpf: introduce task_vma bpf_iter
2021-02-04 20:49 ` [PATCH v4 bpf-next 1/4] bpf: introduce task_vma bpf_iter Song Liu
@ 2021-02-08 4:30 ` Yonghong Song
0 siblings, 0 replies; 7+ messages in thread
From: Yonghong Song @ 2021-02-08 4:30 UTC (permalink / raw)
To: Song Liu, bpf, netdev, linux-mm; +Cc: ast, daniel, kernel-team, akpm
On 2/4/21 12:49 PM, Song Liu wrote:
> Introduce task_vma bpf_iter to print memory information of a process. It
> can be used to print customized information similar to /proc/<pid>/maps.
>
> Current /proc/<pid>/maps and /proc/<pid>/smaps provide information of
> vma's of a process. However, these information are not flexible enough to
> cover all use cases. For example, if a vma cover mixed 2MB pages and 4kB
> pages (x86_64), there is no easy way to tell which address ranges are
> backed by 2MB pages. task_vma solves the problem by enabling the user to
> generate customize information based on the vma (and vma->vm_mm,
> vma->vm_file, etc.).
>
> To access the vma safely in the BPF program, task_vma iterator holds
> target mmap_lock while calling the BPF program. If the mmap_lock is
> contended, task_vma unlocks mmap_lock between iterations to unblock the
> writer(s). This lock contention avoidance mechanism is similar to the one
> used in show_smaps_rollup().
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
> kernel/bpf/task_iter.c | 215 ++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 214 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
> index 175b7b42bfc46..31e63b6c3d718 100644
> --- a/kernel/bpf/task_iter.c
> +++ b/kernel/bpf/task_iter.c
> @@ -286,9 +286,196 @@ static const struct seq_operations task_file_seq_ops = {
> .show = task_file_seq_show,
> };
>
> +struct bpf_iter_seq_task_vma_info {
> + /* The first field must be struct bpf_iter_seq_task_common.
> + * this is assumed by {init, fini}_seq_pidns() callback functions.
> + */
> + struct bpf_iter_seq_task_common common;
> + struct task_struct *task;
> + struct vm_area_struct *vma;
> + u32 tid;
> + unsigned long prev_vm_start;
> + unsigned long prev_vm_end;
> +};
> +
> +enum bpf_task_vma_iter_find_op {
> + task_vma_iter_first_vma, /* use mm->mmap */
> + task_vma_iter_next_vma, /* use curr_vma->vm_next */
> + task_vma_iter_find_vma, /* use find_vma() to find next vma */
> +};
> +
> +static struct vm_area_struct *
> +task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
> +{
> + struct pid_namespace *ns = info->common.ns;
> + enum bpf_task_vma_iter_find_op op;
> + struct vm_area_struct *curr_vma;
> + struct task_struct *curr_task;
> + u32 curr_tid = info->tid;
> +
> + /* If this function returns a non-NULL vma, it holds a reference to
> + * the task_struct, and holds read lock on vma->mm->mmap_lock.
> + * If this function returns NULL, it does not hold any reference or
> + * lock.
> + */
> + if (info->task) {
> + curr_task = info->task;
> + curr_vma = info->vma;
> + /* In case of lock contention, drop mmap_lock to unblock
> + * the writer.
> + */
> + if (mmap_lock_is_contended(curr_task->mm)) {
> + info->prev_vm_start = curr_vma->vm_start;
> + info->prev_vm_end = curr_vma->vm_end;
> + op = task_vma_iter_find_vma;
> + mmap_read_unlock(curr_task->mm);
> + if (mmap_read_lock_killable(curr_task->mm))
> + goto finish;
> + } else {
> + op = task_vma_iter_next_vma;
> + }
> + } else {
> +again:
> + curr_task = task_seq_get_next(ns, &curr_tid, true);
> + if (!curr_task) {
> + info->tid = curr_tid + 1;
> + goto finish;
> + }
> +
> + if (curr_tid != info->tid) {
> + info->tid = curr_tid;
> + op = task_vma_iter_first_vma;
> + } else {
> + op = task_vma_iter_find_vma;
> + }
> +
> + if (!curr_task->mm)
> + goto next_task;
> +
> + if (mmap_read_lock_killable(curr_task->mm))
> + goto finish;
We hold a reference for curr_task here.
Going to "finish" does not release the reference.
> + }
> +
> + switch (op) {
> + case task_vma_iter_first_vma:
> + curr_vma = curr_task->mm->mmap;
> + break;
> + case task_vma_iter_next_vma:
> + curr_vma = curr_vma->vm_next;
> + break;
> + case task_vma_iter_find_vma:
> + /* We dropped mmap_lock so it is necessary to use find_vma
> + * to find the next vma. This is similar to the mechanism
> + * in show_smaps_rollup().
> + */
> + curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1);
> +
> + if (curr_vma && (curr_vma->vm_start == info->prev_vm_start))
> + curr_vma = curr_vma->vm_next;
> + break;
> + }
> + if (!curr_vma) {
> + mmap_read_unlock(curr_task->mm);
> + goto next_task;
> + }
> + info->task = curr_task;
> + info->vma = curr_vma;
> + return curr_vma;
> +
> +next_task:
> + put_task_struct(curr_task);
> + info->task = NULL;
> + curr_tid++;
> + goto again;
> +
> +finish:
> + info->task = NULL;
> + info->vma = NULL;
> + return NULL;
> +}
> +
[...]
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v4 bpf-next 2/4] bpf: allow bpf_d_path in sleepable bpf_iter program
2021-02-04 20:49 [PATCH v4 bpf-next 0/4] introduce bpf_iter for task_vma Song Liu
2021-02-04 20:49 ` [PATCH v4 bpf-next 1/4] bpf: introduce task_vma bpf_iter Song Liu
@ 2021-02-04 20:49 ` Song Liu
2021-02-04 20:50 ` [PATCH v4 bpf-next 3/4] libbpf: introduce section "iter.s/" for " Song Liu
2021-02-04 20:50 ` [PATCH v4 bpf-next 4/4] selftests/bpf: add test for bpf_iter_task_vma Song Liu
3 siblings, 0 replies; 7+ messages in thread
From: Song Liu @ 2021-02-04 20:49 UTC (permalink / raw)
To: bpf, netdev, linux-mm
Cc: ast, daniel, kernel-team, akpm, Song Liu, Yonghong Song
task_file and task_vma iter programs have access to file->f_path. Enable
bpf_d_path to print paths of these file.
bpf_iter programs are generally called in sleepable context. However, it
is still necessary to diffientiate sleepable and non-sleepable bpf_iter
programs: sleepable programs have access to bpf_d_path; non-sleepable
programs have access to bpf_spin_lock.
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
kernel/trace/bpf_trace.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6c0018abe68a0..1f3becd4435ae 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1191,6 +1191,11 @@ BTF_SET_END(btf_allowlist_d_path)
static bool bpf_d_path_allowed(const struct bpf_prog *prog)
{
+ if (prog->type == BPF_PROG_TYPE_TRACING &&
+ prog->expected_attach_type == BPF_TRACE_ITER &&
+ prog->aux->sleepable)
+ return true;
+
if (prog->type == BPF_PROG_TYPE_LSM)
return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
--
2.24.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 bpf-next 3/4] libbpf: introduce section "iter.s/" for sleepable bpf_iter program
2021-02-04 20:49 [PATCH v4 bpf-next 0/4] introduce bpf_iter for task_vma Song Liu
2021-02-04 20:49 ` [PATCH v4 bpf-next 1/4] bpf: introduce task_vma bpf_iter Song Liu
2021-02-04 20:49 ` [PATCH v4 bpf-next 2/4] bpf: allow bpf_d_path in sleepable bpf_iter program Song Liu
@ 2021-02-04 20:50 ` Song Liu
2021-02-04 20:50 ` [PATCH v4 bpf-next 4/4] selftests/bpf: add test for bpf_iter_task_vma Song Liu
3 siblings, 0 replies; 7+ messages in thread
From: Song Liu @ 2021-02-04 20:50 UTC (permalink / raw)
To: bpf, netdev, linux-mm
Cc: ast, daniel, kernel-team, akpm, Song Liu, Yonghong Song, KP Singh
Sleepable iterator program have access to helper functions like bpf_d_path.
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/lib/bpf/libbpf.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2abbc38005684..903ccd7e93206 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8555,6 +8555,11 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_ITER,
.is_attach_btf = true,
.attach_fn = attach_iter),
+ SEC_DEF("iter.s/", TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .is_attach_btf = true,
+ .is_sleepable = true,
+ .attach_fn = attach_iter),
BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP,
BPF_XDP_DEVMAP),
BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
--
2.24.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 bpf-next 4/4] selftests/bpf: add test for bpf_iter_task_vma
2021-02-04 20:49 [PATCH v4 bpf-next 0/4] introduce bpf_iter for task_vma Song Liu
` (2 preceding siblings ...)
2021-02-04 20:50 ` [PATCH v4 bpf-next 3/4] libbpf: introduce section "iter.s/" for " Song Liu
@ 2021-02-04 20:50 ` Song Liu
2021-02-08 4:38 ` Yonghong Song
3 siblings, 1 reply; 7+ messages in thread
From: Song Liu @ 2021-02-04 20:50 UTC (permalink / raw)
To: bpf, netdev, linux-mm; +Cc: ast, daniel, kernel-team, akpm, Song Liu
The test dumps information similar to /proc/pid/maps. The first line of
the output is compared against the /proc file to make sure they match.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
.../selftests/bpf/prog_tests/bpf_iter.c | 114 ++++++++++++++++--
tools/testing/selftests/bpf/progs/bpf_iter.h | 8 ++
.../selftests/bpf/progs/bpf_iter_task_vma.c | 58 +++++++++
3 files changed, 170 insertions(+), 10 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 0e586368948dd..9e8027685e859 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -7,6 +7,7 @@
#include "bpf_iter_task.skel.h"
#include "bpf_iter_task_stack.skel.h"
#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_task_vma.skel.h"
#include "bpf_iter_task_btf.skel.h"
#include "bpf_iter_tcp4.skel.h"
#include "bpf_iter_tcp6.skel.h"
@@ -64,6 +65,22 @@ static void do_dummy_read(struct bpf_program *prog)
bpf_link__destroy(link);
}
+static int read_fd_into_buffer(int fd, char *buf, int size)
+{
+ int bufleft = size;
+ int len;
+
+ do {
+ len = read(fd, buf, bufleft);
+ if (len > 0) {
+ buf += len;
+ bufleft -= len;
+ }
+ } while (len > 0);
+
+ return len < 0 ? len : size - bufleft;
+}
+
static void test_ipv6_route(void)
{
struct bpf_iter_ipv6_route *skel;
@@ -177,7 +194,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
{
struct bpf_program *prog = skel->progs.dump_task_struct;
struct bpf_iter_task_btf__bss *bss = skel->bss;
- int iter_fd = -1, len = 0, bufleft = TASKBUFSZ;
+ int iter_fd = -1, err;
struct bpf_link *link;
char *buf = taskbuf;
int ret = 0;
@@ -190,14 +207,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
goto free_link;
- do {
- len = read(iter_fd, buf, bufleft);
- if (len > 0) {
- buf += len;
- bufleft -= len;
- }
- } while (len > 0);
-
+ err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ);
if (bss->skip) {
printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
ret = 1;
@@ -205,7 +215,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
goto free_link;
}
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
goto free_link;
CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
@@ -1133,6 +1143,88 @@ static void test_buf_neg_offset(void)
bpf_iter_test_kern6__destroy(skel);
}
+#define CMP_BUFFER_SIZE 1024
+static char task_vma_output[CMP_BUFFER_SIZE];
+static char proc_maps_output[CMP_BUFFER_SIZE];
+
+/* remove \0 and \t from str, and only keep the first line */
+static void str_strip_first_line(char *str)
+{
+ char *dst = str, *src = str;
+
+ do {
+ if (*src == ' ' || *src == '\t')
+ src++;
+ else
+ *(dst++) = *(src++);
+
+ } while (*src != '\0' && *src != '\n');
+
+ *dst = '\0';
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static void test_task_vma(void)
+{
+ int err, iter_fd = -1, proc_maps_fd = -1;
+ struct bpf_iter_task_vma *skel;
+ int len, read_size = 4;
+ char maps_path[64];
+
+ skel = bpf_iter_task_vma__open();
+ if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = bpf_iter_task_vma__load(skel);
+ if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n"))
+ goto out;
+
+ skel->links.proc_maps = bpf_program__attach_iter(
+ skel->progs.proc_maps, NULL);
+
+ if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
+ "attach iterator failed\n"))
+ goto out;
+
+ /* read CMP_BUFFER_SIZE (1kB) from bpf_iter */
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto out;
+
+ /* read in small chunks to trigger seq_file corner cases */
+ len = 0;
+ while (len < CMP_BUFFER_SIZE) {
+ err = read_fd_into_buffer(iter_fd, task_vma_output + len,
+ min(read_size, CMP_BUFFER_SIZE - len));
+ if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
+ goto out;
+ len += err;
+ }
+
+ /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */
+ snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid);
+ proc_maps_fd = open(maps_path, O_RDONLY);
+ if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n"))
+ goto out;
+ err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE);
+ if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n"))
+ goto out;
+
+ /* strip and compare the first line of the two files */
+ str_strip_first_line(task_vma_output);
+ str_strip_first_line(proc_maps_output);
+
+ CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output",
+ "found mismatch\n");
+out:
+ close(proc_maps_fd);
+ close(iter_fd);
+ bpf_iter_task_vma__destroy(skel);
+}
+
void test_bpf_iter(void)
{
if (test__start_subtest("btf_id_or_null"))
@@ -1149,6 +1241,8 @@ void test_bpf_iter(void)
test_task_stack();
if (test__start_subtest("task_file"))
test_task_file();
+ if (test__start_subtest("task_vma"))
+ test_task_vma();
if (test__start_subtest("task_btf"))
test_task_btf();
if (test__start_subtest("tcp4"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 6a1255465fd6d..3d83b185c4bcb 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -7,6 +7,7 @@
#define bpf_iter__netlink bpf_iter__netlink___not_used
#define bpf_iter__task bpf_iter__task___not_used
#define bpf_iter__task_file bpf_iter__task_file___not_used
+#define bpf_iter__task_vma bpf_iter__task_vma___not_used
#define bpf_iter__tcp bpf_iter__tcp___not_used
#define tcp6_sock tcp6_sock___not_used
#define bpf_iter__udp bpf_iter__udp___not_used
@@ -26,6 +27,7 @@
#undef bpf_iter__netlink
#undef bpf_iter__task
#undef bpf_iter__task_file
+#undef bpf_iter__task_vma
#undef bpf_iter__tcp
#undef tcp6_sock
#undef bpf_iter__udp
@@ -67,6 +69,12 @@ struct bpf_iter__task_file {
struct file *file;
} __attribute__((preserve_access_index));
+struct bpf_iter__task_vma {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+ struct vm_area_struct *vma;
+} __attribute__((preserve_access_index));
+
struct bpf_iter__bpf_map {
struct bpf_iter_meta *meta;
struct bpf_map *map;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
new file mode 100644
index 0000000000000..d789e32cdb16c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Copied from mm.h */
+#define VM_READ 0x00000001
+#define VM_WRITE 0x00000002
+#define VM_EXEC 0x00000004
+#define VM_MAYSHARE 0x00000080
+
+/* Copied from kdev_t.h */
+#define MINORBITS 20
+#define MINORMASK ((1U << MINORBITS) - 1)
+#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+
+#define D_PATH_BUF_SIZE 1024
+char d_path_buf[D_PATH_BUF_SIZE] = {};
+__u32 pid = 0;
+
+SEC("iter.s/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
+{
+ struct vm_area_struct *vma = ctx->vma;
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ struct file *file;
+ char perm_str[] = "----";
+
+ if (task == (void *)0 || vma == (void *)0)
+ return 0;
+
+ file = vma->vm_file;
+ if (task->tgid != pid)
+ return 0;
+ perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-';
+ perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-';
+ perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-';
+ perm_str[3] = (vma->vm_flags & VM_MAYSHARE) ? 's' : 'p';
+ BPF_SEQ_PRINTF(seq, "%08llx-%08llx %s ", vma->vm_start, vma->vm_end, perm_str);
+
+ if (file) {
+ __u32 dev = file->f_inode->i_sb->s_dev;
+
+ bpf_d_path(&file->f_path, d_path_buf, D_PATH_BUF_SIZE);
+
+ BPF_SEQ_PRINTF(seq, "%08llx ", vma->vm_pgoff << 12);
+ BPF_SEQ_PRINTF(seq, "%02x:%02x %u", MAJOR(dev), MINOR(dev),
+ file->f_inode->i_ino);
+ BPF_SEQ_PRINTF(seq, "\t%s\n", d_path_buf);
+ } else {
+ BPF_SEQ_PRINTF(seq, "%08llx 00:00 0\n", 0ULL);
+ }
+ return 0;
+}
--
2.24.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v4 bpf-next 4/4] selftests/bpf: add test for bpf_iter_task_vma
2021-02-04 20:50 ` [PATCH v4 bpf-next 4/4] selftests/bpf: add test for bpf_iter_task_vma Song Liu
@ 2021-02-08 4:38 ` Yonghong Song
0 siblings, 0 replies; 7+ messages in thread
From: Yonghong Song @ 2021-02-08 4:38 UTC (permalink / raw)
To: Song Liu, bpf, netdev, linux-mm; +Cc: ast, daniel, kernel-team, akpm
On 2/4/21 12:50 PM, Song Liu wrote:
> The test dumps information similar to /proc/pid/maps. The first line of
> the output is compared against the /proc file to make sure they match.
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
> .../selftests/bpf/prog_tests/bpf_iter.c | 114 ++++++++++++++++--
> tools/testing/selftests/bpf/progs/bpf_iter.h | 8 ++
> .../selftests/bpf/progs/bpf_iter_task_vma.c | 58 +++++++++
> 3 files changed, 170 insertions(+), 10 deletions(-)
> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 0e586368948dd..9e8027685e859 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -7,6 +7,7 @@
> #include "bpf_iter_task.skel.h"
> #include "bpf_iter_task_stack.skel.h"
> #include "bpf_iter_task_file.skel.h"
> +#include "bpf_iter_task_vma.skel.h"
> #include "bpf_iter_task_btf.skel.h"
> #include "bpf_iter_tcp4.skel.h"
> #include "bpf_iter_tcp6.skel.h"
> @@ -64,6 +65,22 @@ static void do_dummy_read(struct bpf_program *prog)
> bpf_link__destroy(link);
> }
>
> +static int read_fd_into_buffer(int fd, char *buf, int size)
> +{
> + int bufleft = size;
> + int len;
> +
> + do {
> + len = read(fd, buf, bufleft);
> + if (len > 0) {
> + buf += len;
> + bufleft -= len;
> + }
> + } while (len > 0);
> +
> + return len < 0 ? len : size - bufleft;
> +}
> +
> static void test_ipv6_route(void)
> {
> struct bpf_iter_ipv6_route *skel;
> @@ -177,7 +194,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
> {
> struct bpf_program *prog = skel->progs.dump_task_struct;
> struct bpf_iter_task_btf__bss *bss = skel->bss;
> - int iter_fd = -1, len = 0, bufleft = TASKBUFSZ;
> + int iter_fd = -1, err;
> struct bpf_link *link;
> char *buf = taskbuf;
> int ret = 0;
> @@ -190,14 +207,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
> if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
> goto free_link;
>
> - do {
> - len = read(iter_fd, buf, bufleft);
> - if (len > 0) {
> - buf += len;
> - bufleft -= len;
> - }
> - } while (len > 0);
> -
> + err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ);
> if (bss->skip) {
> printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
> ret = 1;
> @@ -205,7 +215,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
> goto free_link;
> }
>
> - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
> + if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
> goto free_link;
>
> CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
> @@ -1133,6 +1143,88 @@ static void test_buf_neg_offset(void)
> bpf_iter_test_kern6__destroy(skel);
> }
>
> +#define CMP_BUFFER_SIZE 1024
> +static char task_vma_output[CMP_BUFFER_SIZE];
> +static char proc_maps_output[CMP_BUFFER_SIZE];
> +
> +/* remove \0 and \t from str, and only keep the first line */
> +static void str_strip_first_line(char *str)
> +{
> + char *dst = str, *src = str;
> +
> + do {
> + if (*src == ' ' || *src == '\t')
> + src++;
> + else
> + *(dst++) = *(src++);
> +
> + } while (*src != '\0' && *src != '\n');
> +
> + *dst = '\0';
> +}
> +
> +#define min(a, b) ((a) < (b) ? (a) : (b))
> +
> +static void test_task_vma(void)
> +{
> + int err, iter_fd = -1, proc_maps_fd = -1;
> + struct bpf_iter_task_vma *skel;
> + int len, read_size = 4;
> + char maps_path[64];
> +
> + skel = bpf_iter_task_vma__open();
> + if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n"))
> + return;
> +
> + skel->bss->pid = getpid();
> +
> + err = bpf_iter_task_vma__load(skel);
> + if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n"))
> + goto out;
> +
> + skel->links.proc_maps = bpf_program__attach_iter(
> + skel->progs.proc_maps, NULL);
> +
> + if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
> + "attach iterator failed\n"))
> + goto out;
You need to set skel->links.proc_maps = NULL before goto out. Otherwise,
libbpf may segfault.
> +
> + /* read CMP_BUFFER_SIZE (1kB) from bpf_iter */
This comment better right before
/* read in small chunks to trigger seq_file corner cases */
> + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
> + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
> + goto out;
> +
> + /* read in small chunks to trigger seq_file corner cases */
> + len = 0;
> + while (len < CMP_BUFFER_SIZE) {
Some comment will be better to assure that we won't trigger infinite
loop here, e.g., err == 0 case.
> + err = read_fd_into_buffer(iter_fd, task_vma_output + len,
> + min(read_size, CMP_BUFFER_SIZE - len));
> + if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
> + goto out;
> + len += err;
> + }
> +
> + /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */
> + snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid);
> + proc_maps_fd = open(maps_path, O_RDONLY);
> + if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n"))
> + goto out;
> + err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE);
> + if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n"))
> + goto out;
> +
> + /* strip and compare the first line of the two files */
> + str_strip_first_line(task_vma_output);
> + str_strip_first_line(proc_maps_output);
> +
> + CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output",
> + "found mismatch\n");
> +out:
> + close(proc_maps_fd);
> + close(iter_fd);
> + bpf_iter_task_vma__destroy(skel);
> +}
> +
> void test_bpf_iter(void)
> {
> if (test__start_subtest("btf_id_or_null"))
> @@ -1149,6 +1241,8 @@ void test_bpf_iter(void)
> test_task_stack();
> if (test__start_subtest("task_file"))
> test_task_file();
> + if (test__start_subtest("task_vma"))
> + test_task_vma();
> if (test__start_subtest("task_btf"))
> test_task_btf();
> if (test__start_subtest("tcp4"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
> index 6a1255465fd6d..3d83b185c4bcb 100644
> --- a/tools/testing/selftests/bpf/progs/bpf_iter.h
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
[...]
^ permalink raw reply [flat|nested] 7+ messages in thread