All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
To: Hao Luo <haoluo@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>, KP Singh <kpsingh@kernel.org>,
	Shakeel Butt <shakeelb@google.com>,
	Joe Burton <jevburton.kernel@gmail.com>,
	Tejun Heo <tj@kernel.org>,
	joshdon@google.com, sdf@google.com, bpf@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH bpf-next v1 8/9] bpf: Introduce cgroup iter
Date: Thu, 3 Mar 2022 04:15:06 +0530	[thread overview]
Message-ID: <20220302224506.jc7jwkdaatukicik@apollo.legion> (raw)
In-Reply-To: <20220225234339.2386398-9-haoluo@google.com>

On Sat, Feb 26, 2022 at 05:13:38AM IST, Hao Luo wrote:
> Introduce a new type of iter prog: cgroup. Unlike other bpf_iter, this
> iter doesn't iterate a set of kernel objects. Instead, it is supposed to
> be parameterized by a cgroup id and prints only that cgroup. So one
> needs to specify a target cgroup id when attaching this iter.
>
> The target cgroup's state can be read out via a link of this iter.
> Typically, we can monitor cgroup creation and deletion using sleepable
> tracing and use it to create corresponding directories in bpffs and pin
> a cgroup id parameterized link in the directory. Then we can read the
> auto-pinned iter link to get cgroup's state. The output of the iter link
> is determined by the program. See the selftest test_cgroup_stats.c for
> an example.
>
> Signed-off-by: Hao Luo <haoluo@google.com>
> ---
>  include/linux/bpf.h            |   1 +
>  include/uapi/linux/bpf.h       |   6 ++
>  kernel/bpf/Makefile            |   2 +-
>  kernel/bpf/cgroup_iter.c       | 141 +++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |   6 ++
>  5 files changed, 155 insertions(+), 1 deletion(-)
>  create mode 100644 kernel/bpf/cgroup_iter.c
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 759ade7b24b3..3ce9b0b7ed89 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1595,6 +1595,7 @@ int bpf_obj_get_path(bpfptr_t pathname, int flags);
>
>  struct bpf_iter_aux_info {
>  	struct bpf_map *map;
> +	u64 cgroup_id;
>  };
>
>  typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index a5dbc794403d..855ad80d9983 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -91,6 +91,9 @@ union bpf_iter_link_info {
>  	struct {
>  		__u32	map_fd;
>  	} map;
> +	struct {
> +		__u64	cgroup_id;
> +	} cgroup;
>  };
>
>  /* BPF syscall commands, see bpf(2) man-page for more details. */
> @@ -5887,6 +5890,9 @@ struct bpf_link_info {
>  				struct {
>  					__u32 map_id;
>  				} map;
> +				struct {
> +					__u64 cgroup_id;
> +				} cgroup;
>  			};
>  		} iter;
>  		struct  {
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index c1a9be6a4b9f..52a0e4c6e96e 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -8,7 +8,7 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
>
>  obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
>  obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
> -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
> +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o cgroup_iter.o
>  obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
>  obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
>  obj-$(CONFIG_BPF_SYSCALL) += disasm.o
> diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
> new file mode 100644
> index 000000000000..011d9dcd1d51
> --- /dev/null
> +++ b/kernel/bpf/cgroup_iter.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2022 Google */
> +#include <linux/bpf.h>
> +#include <linux/btf_ids.h>
> +#include <linux/cgroup.h>
> +#include <linux/kernel.h>
> +#include <linux/seq_file.h>
> +
> +struct bpf_iter__cgroup {
> +	__bpf_md_ptr(struct bpf_iter_meta *, meta);
> +	__bpf_md_ptr(struct cgroup *, cgroup);
> +};
> +
> +static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
> +{
> +	struct cgroup *cgroup;
> +	u64 cgroup_id;
> +
> +	/* Only one session is supported. */
> +	if (*pos > 0)
> +		return NULL;
> +
> +	cgroup_id = *(u64 *)seq->private;
> +	cgroup = cgroup_get_from_id(cgroup_id);
> +	if (!cgroup)
> +		return NULL;
> +
> +	if (*pos == 0)
> +		++*pos;
> +
> +	return cgroup;
> +}
> +
> +static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> +{
> +	++*pos;
> +	return NULL;
> +}
> +
> +static int cgroup_iter_seq_show(struct seq_file *seq, void *v)
> +{
> +	struct bpf_iter__cgroup ctx;
> +	struct bpf_iter_meta meta;
> +	struct bpf_prog *prog;
> +	int ret = 0;
> +
> +	ctx.meta = &meta;
> +	ctx.cgroup = v;
> +	meta.seq = seq;
> +	prog = bpf_iter_get_info(&meta, false);
> +	if (prog)
> +		ret = bpf_iter_run_prog(prog, &ctx);
> +
> +	return ret;
> +}
> +
> +static void cgroup_iter_seq_stop(struct seq_file *seq, void *v)
> +{
> +	if (v)
> +		cgroup_put(v);
> +}

I think in existing iterators, we make a final call to seq_show, with v as NULL,
is there a specific reason to do it differently for this? There is logic in
bpf_iter.c to trigger ->stop() callback again when ->start() or ->next() returns
NULL, to execute BPF program with NULL p, see the comment above stop label.

If you do add the seq_show call with NULL, you'd also need to change the
ctx_arg_info PTR_TO_BTF_ID to PTR_TO_BTF_ID_OR_NULL.

> +
> +static const struct seq_operations cgroup_iter_seq_ops = {
> +	.start  = cgroup_iter_seq_start,
> +	.next   = cgroup_iter_seq_next,
> +	.stop   = cgroup_iter_seq_stop,
> +	.show   = cgroup_iter_seq_show,
> +};
> +
> +BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup)
> +
> +static int cgroup_iter_seq_init(void *priv_data, struct bpf_iter_aux_info *aux)
> +{
> +	*(u64 *)priv_data = aux->cgroup_id;
> +	return 0;
> +}
> +
> +static void cgroup_iter_seq_fini(void *priv_data)
> +{
> +}
> +
> +static const struct bpf_iter_seq_info cgroup_iter_seq_info = {
> +	.seq_ops                = &cgroup_iter_seq_ops,
> +	.init_seq_private       = cgroup_iter_seq_init,
> +	.fini_seq_private       = cgroup_iter_seq_fini,
> +	.seq_priv_size          = sizeof(u64),
> +};
> +
> +static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
> +				  union bpf_iter_link_info *linfo,
> +				  struct bpf_iter_aux_info *aux)
> +{
> +	aux->cgroup_id = linfo->cgroup.cgroup_id;
> +	return 0;
> +}
> +
> +static void bpf_iter_detach_cgroup(struct bpf_iter_aux_info *aux)
> +{
> +}
> +
> +void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux,
> +				 struct seq_file *seq)
> +{
> +	char buf[64] = {0};
> +
> +	cgroup_path_from_kernfs_id(aux->cgroup_id, buf, sizeof(buf));
> +	seq_printf(seq, "cgroup_id:\t%lu\n", aux->cgroup_id);
> +	seq_printf(seq, "cgroup_path:\t%s\n", buf);
> +}
> +
> +int bpf_iter_cgroup_fill_link_info(const struct bpf_iter_aux_info *aux,
> +				   struct bpf_link_info *info)
> +{
> +	info->iter.cgroup.cgroup_id = aux->cgroup_id;
> +	return 0;
> +}
> +
> +DEFINE_BPF_ITER_FUNC(cgroup, struct bpf_iter_meta *meta,
> +		     struct cgroup *cgroup)
> +
> +static struct bpf_iter_reg bpf_cgroup_reg_info = {
> +	.target			= "cgroup",
> +	.attach_target		= bpf_iter_attach_cgroup,
> +	.detach_target		= bpf_iter_detach_cgroup,
> +	.show_fdinfo		= bpf_iter_cgroup_show_fdinfo,
> +	.fill_link_info		= bpf_iter_cgroup_fill_link_info,
> +	.ctx_arg_info_size	= 1,
> +	.ctx_arg_info		= {
> +		{ offsetof(struct bpf_iter__cgroup, cgroup),
> +		  PTR_TO_BTF_ID },
> +	},
> +	.seq_info		= &cgroup_iter_seq_info,
> +};
> +
> +static int __init bpf_cgroup_iter_init(void)
> +{
> +	bpf_cgroup_reg_info.ctx_arg_info[0].btf_id = bpf_cgroup_btf_id[0];
> +	return bpf_iter_reg_target(&bpf_cgroup_reg_info);
> +}
> +
> +late_initcall(bpf_cgroup_iter_init);
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index a5dbc794403d..855ad80d9983 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -91,6 +91,9 @@ union bpf_iter_link_info {
>  	struct {
>  		__u32	map_fd;
>  	} map;
> +	struct {
> +		__u64	cgroup_id;
> +	} cgroup;
>  };
>
>  /* BPF syscall commands, see bpf(2) man-page for more details. */
> @@ -5887,6 +5890,9 @@ struct bpf_link_info {
>  				struct {
>  					__u32 map_id;
>  				} map;
> +				struct {
> +					__u64 cgroup_id;
> +				} cgroup;
>  			};
>  		} iter;
>  		struct  {
> --
> 2.35.1.574.g5d30c73bfb-goog
>

--
Kartikeya

  parent reply	other threads:[~2022-03-02 22:45 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-25 23:43 [PATCH bpf-next v1 0/9] Extend cgroup interface with bpf Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 1/9] bpf: Add mkdir, rmdir, unlink syscalls for prog_bpf_syscall Hao Luo
2022-02-27  5:18   ` Kumar Kartikeya Dwivedi
2022-02-28 22:10     ` Hao Luo
2022-03-02 19:34       ` Alexei Starovoitov
2022-03-03 18:50         ` Hao Luo
2022-03-04 18:37           ` Hao Luo
2022-03-05 23:47             ` Alexei Starovoitov
2022-03-08 21:08               ` Hao Luo
2022-03-02 20:55   ` Yonghong Song
2022-03-03 18:56     ` Hao Luo
2022-03-03 19:13       ` Yonghong Song
2022-03-03 19:15         ` Hao Luo
2022-03-12  3:46   ` Al Viro
2022-03-14 17:07     ` Hao Luo
2022-03-14 23:10       ` Al Viro
2022-03-15 17:27         ` Hao Luo
2022-03-15 18:59           ` Alexei Starovoitov
2022-03-15 19:03             ` Alexei Starovoitov
2022-03-15 19:00           ` Al Viro
2022-03-15 19:47             ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 2/9] bpf: Add BPF_OBJ_PIN and BPF_OBJ_GET in the bpf_sys_bpf helper Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 3/9] selftests/bpf: tests mkdir, rmdir, unlink and pin in syscall Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 4/9] bpf: Introduce sleepable tracepoints Hao Luo
2022-03-02 19:41   ` Alexei Starovoitov
2022-03-03 19:37     ` Hao Luo
2022-03-03 19:59       ` Alexei Starovoitov
2022-03-02 21:23   ` Yonghong Song
2022-03-02 21:30     ` Alexei Starovoitov
2022-03-03  1:08       ` Yonghong Song
2022-03-03  2:29         ` Alexei Starovoitov
2022-03-03 19:43           ` Hao Luo
2022-03-03 20:02             ` Alexei Starovoitov
2022-03-03 20:04               ` Alexei Starovoitov
2022-03-03 22:06                 ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 5/9] cgroup: Sleepable cgroup tracepoints Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 6/9] libbpf: Add sleepable tp_btf Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 7/9] bpf: Lift permission check in __sys_bpf when called from kernel Hao Luo
2022-03-02 20:01   ` Alexei Starovoitov
2022-03-03 19:14     ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 8/9] bpf: Introduce cgroup iter Hao Luo
2022-02-26  2:32   ` kernel test robot
2022-02-26  2:32   ` kernel test robot
2022-02-26  2:53   ` kernel test robot
2022-03-02 21:59   ` Yonghong Song
2022-03-03 20:02     ` Hao Luo
2022-03-02 22:45   ` Kumar Kartikeya Dwivedi [this message]
2022-03-03  2:03     ` Yonghong Song
2022-03-03  3:03       ` Kumar Kartikeya Dwivedi
2022-03-03  4:00         ` Alexei Starovoitov
2022-03-03  7:33         ` Yonghong Song
2022-03-03  8:13           ` Kumar Kartikeya Dwivedi
2022-03-03 21:52           ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 9/9] selftests/bpf: Tests using sleepable tracepoints to monitor cgroup events Hao Luo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220302224506.jc7jwkdaatukicik@apollo.legion \
    --to=memxor@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=haoluo@google.com \
    --cc=jevburton.kernel@gmail.com \
    --cc=joshdon@google.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sdf@google.com \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=tj@kernel.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.