* [PATCH RFC 1/4] cgroup, perf: Add ability to connect to perf cgroup from other cgroup controller
2021-11-18 20:28 [PATCH RFC 0/4] Add ability to attach bpf programs to a tracepoint inside a cgroup Kenny Ho
@ 2021-11-18 20:28 ` Kenny Ho
2021-11-18 20:28 ` [PATCH RFC 2/4] bpf, perf: add ability to attach complete array of bpf prog to perf event Kenny Ho
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Kenny Ho @ 2021-11-18 20:28 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Tejun Heo, Zefan Li, Johannes Weiner, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland,
Alexander Shishkin, Jiri Olsa, Namhyung Kim, Steven Rostedt,
netdev, bpf, linux-kernel, cgroups, linux-perf-users, y2kenny,
Kenny.Ho, amd-gfx
This provides the ability to allocate cgroup specific perf_event by
bpf-cgroup in later patch
Change-Id: I13aa7f3dfc2883ba3663c0b94744a6169504bbd8
Signed-off-by: Kenny Ho <Kenny.Ho@amd.com>
---
include/linux/cgroup.h | 2 ++
include/linux/perf_event.h | 2 ++
kernel/cgroup/cgroup.c | 4 ++--
kernel/events/core.c | 17 +++++++++++++++++
4 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 75c151413fda..1754e33cfe5e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -97,6 +97,8 @@ extern struct css_set init_css_set;
bool css_has_online_children(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
+struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgroup,
+ struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0cbc5dfe1110..9c440db65c18 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -926,6 +926,8 @@ struct perf_cgroup {
struct perf_cgroup_info __percpu *info;
};
+extern struct perf_cgroup *cgroup_tryget_perf_cgroup(struct cgroup *cgrp);
+
/*
* Must ensure cgroup is pinned (css_get) before calling
* this function. In other words, we cannot call this function
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 570b0c97392a..a645b212b69b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -495,8 +495,8 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
* Find and get @cgrp's css associated with @ss. If the css doesn't exist
* or is offline, %NULL is returned.
*/
-static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
- struct cgroup_subsys *ss)
+struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 20367196fa9a..d34e00749c9b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -941,6 +941,18 @@ static int perf_cgroup_ensure_storage(struct perf_event *event,
return ret;
}
+struct perf_cgroup *cgroup_tryget_perf_cgroup(struct cgroup *cgrp)
+{
+ struct cgroup_subsys_state *css;
+
+ css = cgroup_tryget_css(cgrp, &perf_event_cgrp_subsys);
+
+ if (!css)
+ return NULL;
+
+ return container_of(css, struct perf_cgroup, css);
+}
+
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
struct perf_event_attr *attr,
struct perf_event *group_leader)
@@ -1080,6 +1092,11 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
{
}
+struct perf_cgroup *cgroup_tryget_perf_cgroup(struct cgroup *cgrp)
+{
+ return NULL;
+}
+
static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
struct perf_event_attr *attr,
struct perf_event *group_leader)
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH RFC 3/4] bpf,cgroup,tracing: add new BPF_PROG_TYPE_CGROUP_TRACEPOINT
2021-11-18 20:28 [PATCH RFC 0/4] Add ability to attach bpf programs to a tracepoint inside a cgroup Kenny Ho
2021-11-18 20:28 ` [PATCH RFC 1/4] cgroup, perf: Add ability to connect to perf cgroup from other cgroup controller Kenny Ho
2021-11-18 20:28 ` [PATCH RFC 2/4] bpf, perf: add ability to attach complete array of bpf prog to perf event Kenny Ho
@ 2021-11-18 20:28 ` Kenny Ho
2021-11-18 20:28 ` [PATCH RFC 4/4] bpf,cgroup,perf: extend bpf-cgroup to support tracepoint attachment Kenny Ho
3 siblings, 0 replies; 7+ messages in thread
From: Kenny Ho @ 2021-11-18 20:28 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Tejun Heo, Zefan Li, Johannes Weiner, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland,
Alexander Shishkin, Jiri Olsa, Namhyung Kim, Steven Rostedt,
netdev, bpf, linux-kernel, cgroups, linux-perf-users, y2kenny,
Kenny.Ho, amd-gfx
Change-Id: Ic9727186bb8c76c757e48635143b16e607f2299f
Signed-off-by: Kenny Ho <Kenny.Ho@amd.com>
---
include/linux/bpf-cgroup.h | 2 ++
include/linux/bpf_types.h | 4 ++++
include/uapi/linux/bpf.h | 2 ++
kernel/bpf/syscall.c | 4 ++++
kernel/trace/bpf_trace.c | 8 ++++++++
5 files changed, 20 insertions(+)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2746fd804216..a5e4d9b19470 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -48,6 +48,7 @@ enum cgroup_bpf_attach_type {
CGROUP_INET4_GETSOCKNAME,
CGROUP_INET6_GETSOCKNAME,
CGROUP_INET_SOCK_RELEASE,
+ CGROUP_TRACEPOINT,
MAX_CGROUP_BPF_ATTACH_TYPE
};
@@ -81,6 +82,7 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
+ CGROUP_ATYPE(CGROUP_TRACEPOINT);
default:
return CGROUP_BPF_ATTACH_TYPE_INVALID;
}
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9c81724e4b98..c108f498a35e 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -57,6 +57,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl,
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt,
struct bpf_sockopt, struct bpf_sockopt_kern)
#endif
+#if defined (CONFIG_BPF_EVENTS) && defined (CONFIG_CGROUP_BPF)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_TRACEPOINT, cg_tracepoint,
+ __u64, u64)
+#endif
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2,
__u32, u32)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6fc59d61937a..014ffaa3fc2a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -949,6 +949,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+ BPF_PROG_TYPE_CGROUP_TRACEPOINT,
};
enum bpf_attach_type {
@@ -994,6 +995,7 @@ enum bpf_attach_type {
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
BPF_PERF_EVENT,
+ BPF_CGROUP_TRACEPOINT,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e50c0bfdb7d..d77598fa4eb2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2149,6 +2149,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_LSM:
case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
case BPF_PROG_TYPE_EXT: /* extends any prog */
+ case BPF_PROG_TYPE_CGROUP_TRACEPOINT:
return true;
default:
return false;
@@ -3137,6 +3138,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_SK_LOOKUP;
case BPF_XDP:
return BPF_PROG_TYPE_XDP;
+ case BPF_CGROUP_TRACEPOINT:
+ return BPF_PROG_TYPE_CGROUP_TRACEPOINT;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -3189,6 +3192,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_CGROUP_TRACEPOINT:
case BPF_PROG_TYPE_SOCK_OPS:
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8addd10202c2..4ad864a4852a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1798,6 +1798,14 @@ const struct bpf_verifier_ops perf_event_verifier_ops = {
const struct bpf_prog_ops perf_event_prog_ops = {
};
+const struct bpf_verifier_ops cg_tracepoint_verifier_ops = {
+ .get_func_proto = tp_prog_func_proto,
+ .is_valid_access = tp_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops cg_tracepoint_prog_ops = {
+};
+
static DEFINE_MUTEX(bpf_event_mutex);
#define BPF_TRACE_MAX_PROGS 64
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH RFC 4/4] bpf,cgroup,perf: extend bpf-cgroup to support tracepoint attachment
2021-11-18 20:28 [PATCH RFC 0/4] Add ability to attach bpf programs to a tracepoint inside a cgroup Kenny Ho
` (2 preceding siblings ...)
2021-11-18 20:28 ` [PATCH RFC 3/4] bpf,cgroup,tracing: add new BPF_PROG_TYPE_CGROUP_TRACEPOINT Kenny Ho
@ 2021-11-18 20:28 ` Kenny Ho
2021-11-19 4:33 ` Alexei Starovoitov
3 siblings, 1 reply; 7+ messages in thread
From: Kenny Ho @ 2021-11-18 20:28 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Tejun Heo, Zefan Li, Johannes Weiner, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland,
Alexander Shishkin, Jiri Olsa, Namhyung Kim, Steven Rostedt,
netdev, bpf, linux-kernel, cgroups, linux-perf-users, y2kenny,
Kenny.Ho, amd-gfx
bpf progs are attached to cgroups as usual with the idea of effective
progs remain the same. The perf event / tracepoint's fd is defined as
attachment 'subtype'. The 'subtype' is passed along during attachment
via bpf_attr, reusing replace_bpf_fd field.
After the effective progs are calculated, perf_event is allocated using
the 'subtype'/'fd' value for all cpus filtering on the perf cgroup that
corresponds to the bpf-cgroup (with assumption of a unified hierarchy.)
The effective bpf prog array is then attached to each newly allocated
perf_event and subsequently enabled by activate_effective_progs.
Change-Id: I07a4dcaa0a682bafa496f05411365100d6c84fff
Signed-off-by: Kenny Ho <Kenny.Ho@amd.com>
---
include/linux/bpf-cgroup.h | 15 ++++--
include/linux/perf_event.h | 4 ++
kernel/bpf/cgroup.c | 96 +++++++++++++++++++++++++++++++-------
kernel/cgroup/cgroup.c | 9 ++--
kernel/events/core.c | 45 ++++++++++++++++++
5 files changed, 142 insertions(+), 27 deletions(-)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a5e4d9b19470..b6e22fd2aa6e 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -154,6 +154,11 @@ struct cgroup_bpf {
/* cgroup_bpf is released using a work queue */
struct work_struct release_work;
+
+ /* list of perf events (per child cgroups) for tracepoint/kprobe/uprobe bpf attachment to cgroup */
+ /* TODO: array of tp type with array of events for each cgroup
+ * currently only one tp type supported at a time */
+ struct list_head per_cg_events;
};
int cgroup_bpf_inherit(struct cgroup *cgrp);
@@ -161,21 +166,21 @@ void cgroup_bpf_offline(struct cgroup *cgrp);
int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
- struct bpf_cgroup_link *link,
+ struct bpf_cgroup_link *link, int bpf_attach_subtype,
enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link,
- enum bpf_attach_type type);
+ enum bpf_attach_type type, int bpf_attach_subtype);
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr);
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
- struct bpf_cgroup_link *link, enum bpf_attach_type type,
- u32 flags);
+ struct bpf_cgroup_link *link, int bpf_attach_subtype,
+ enum bpf_attach_type type, u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type);
+ enum bpf_attach_type type, int bpf_attach_subtype);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9c440db65c18..5a149d8865a1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -776,6 +776,7 @@ struct perf_event {
#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp; /* cgroup event is attach to */
+ struct list_head bpf_cg_list;
#endif
#ifdef CONFIG_SECURITY
@@ -982,6 +983,9 @@ extern void perf_pmu_resched(struct pmu *pmu);
extern int perf_event_refresh(struct perf_event *event, int refresh);
extern void perf_event_update_userpage(struct perf_event *event);
extern int perf_event_release_kernel(struct perf_event *event);
+extern int perf_event_create_for_all_cpus(struct perf_event_attr *attr,
+ struct cgroup *cgroup,
+ struct list_head *entries);
extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr,
int cpu,
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 03145d45e3d5..0ecf465ddfb2 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -14,6 +14,8 @@
#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
+#include <linux/perf_event.h>
+#include <linux/trace_events.h>
#include <net/sock.h>
#include <net/bpf_sk_storage.h>
@@ -112,6 +114,8 @@ static void cgroup_bpf_release(struct work_struct *work)
struct bpf_prog_array *old_array;
struct list_head *storages = &cgrp->bpf.storages;
struct bpf_cgroup_storage *storage, *stmp;
+ struct list_head *events = &cgrp->bpf.per_cg_events;
+ struct perf_event *event, *etmp;
unsigned int atype;
@@ -141,6 +145,10 @@ static void cgroup_bpf_release(struct work_struct *work)
bpf_cgroup_storage_free(storage);
}
+ list_for_each_entry_safe(event, etmp, events, bpf_cg_list) {
+ perf_event_release_kernel(event);
+ }
+
mutex_unlock(&cgroup_mutex);
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
@@ -226,13 +234,16 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
*/
static int compute_effective_progs(struct cgroup *cgrp,
enum cgroup_bpf_attach_type atype,
+ int bpf_attach_subtype,
struct bpf_prog_array **array)
{
struct bpf_prog_array_item *item;
struct bpf_prog_array *progs;
struct bpf_prog_list *pl;
struct cgroup *p = cgrp;
- int cnt = 0;
+ struct perf_event *event, *etmp;
+ struct perf_event_attr attr = {};
+ int rc, cnt = 0;
/* count number of effective programs by walking parents */
do {
@@ -245,6 +256,21 @@ static int compute_effective_progs(struct cgroup *cgrp,
if (!progs)
return -ENOMEM;
+ if (atype == CGROUP_TRACEPOINT) {
+ /* TODO: only create event for cgroup that can have process */
+
+ attr.config = bpf_attach_subtype;
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ rc = perf_event_create_for_all_cpus(&attr, cgrp,
+ &cgrp->bpf.per_cg_events);
+ if (rc)
+ goto err;
+ }
+
/* populate the array with effective progs */
cnt = 0;
p = cgrp;
@@ -264,20 +290,41 @@ static int compute_effective_progs(struct cgroup *cgrp,
}
} while ((p = cgroup_parent(p)));
+ if (atype == CGROUP_TRACEPOINT) {
+ list_for_each_entry_safe(event, etmp, &cgrp->bpf.per_cg_events, bpf_cg_list) {
+ rc = perf_event_attach_bpf_prog_array(event, progs);
+ if (rc)
+ goto err_attach;
+ }
+ }
+
*array = progs;
return 0;
+err_attach:
+ list_for_each_entry_safe(event, etmp, &cgrp->bpf.per_cg_events, bpf_cg_list)
+ perf_event_release_kernel(event);
+err:
+ bpf_prog_array_free(progs);
+ return rc;
}
static void activate_effective_progs(struct cgroup *cgrp,
enum cgroup_bpf_attach_type atype,
struct bpf_prog_array *old_array)
{
- old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
- lockdep_is_held(&cgroup_mutex));
- /* free prog array after grace period, since __cgroup_bpf_run_*()
- * might be still walking the array
- */
- bpf_prog_array_free(old_array);
+ struct perf_event *event, *etmp;
+
+ if (atype == CGROUP_TRACEPOINT)
+ list_for_each_entry_safe(event, etmp, &cgrp->bpf.per_cg_events, bpf_cg_list)
+ perf_event_enable(event);
+ else {
+ old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
+ lockdep_is_held(&cgroup_mutex));
+ /* free prog array after grace period, since __cgroup_bpf_run_*()
+ * might be still walking the array
+ */
+ bpf_prog_array_free(old_array);
+ }
}
/**
@@ -306,9 +353,10 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
INIT_LIST_HEAD(&cgrp->bpf.storages);
+ INIT_LIST_HEAD(&cgrp->bpf.per_cg_events);
for (i = 0; i < NR; i++)
- if (compute_effective_progs(cgrp, i, &arrays[i]))
+ if (compute_effective_progs(cgrp, i, -1, &arrays[i]))
goto cleanup;
for (i = 0; i < NR; i++)
@@ -328,7 +376,8 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
}
static int update_effective_progs(struct cgroup *cgrp,
- enum cgroup_bpf_attach_type atype)
+ enum cgroup_bpf_attach_type atype,
+ int bpf_attach_subtype)
{
struct cgroup_subsys_state *css;
int err;
@@ -340,7 +389,8 @@ static int update_effective_progs(struct cgroup *cgrp,
if (percpu_ref_is_zero(&desc->bpf.refcnt))
continue;
- err = compute_effective_progs(desc, atype, &desc->bpf.inactive);
+ err = compute_effective_progs(desc, atype, bpf_attach_subtype,
+ &desc->bpf.inactive);
if (err)
goto cleanup;
}
@@ -424,6 +474,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
* @prog: A program to attach
* @link: A link to attach
* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
+ * @bpf_attach_subtype: Type ID of perf tracing event for tracepoint/kprobe/uprobe
* @type: Type of attach operation
* @flags: Option flags
*
@@ -432,7 +483,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
*/
int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
- struct bpf_cgroup_link *link,
+ struct bpf_cgroup_link *link, int bpf_attach_subtype,
enum bpf_attach_type type, u32 flags)
{
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
@@ -454,6 +505,14 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
if (!!replace_prog != !!(flags & BPF_F_REPLACE))
/* replace_prog implies BPF_F_REPLACE, and vice versa */
return -EINVAL;
+ if ((type == BPF_CGROUP_TRACEPOINT) &&
+ ((flags & BPF_F_REPLACE) || (bpf_attach_subtype < 0) || !(flags & BPF_F_ALLOW_MULTI)))
+ /* replace fd is used to pass the subtype */
+ /* subtype is required for BPF_CGROUP_TRACEPOINT */
+ /* not allow multi BPF progs for the attach type for now */
+ return -EINVAL;
+
+ /* TODO check bpf_attach_subtype is valid */
atype = to_cgroup_bpf_attach_type(type);
if (atype < 0)
@@ -499,7 +558,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
bpf_cgroup_storages_assign(pl->storage, storage);
cgrp->bpf.flags[atype] = saved_flags;
- err = update_effective_progs(cgrp, atype);
+ err = update_effective_progs(cgrp, atype, bpf_attach_subtype);
if (err)
goto cleanup;
@@ -679,7 +738,8 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
* Must be called with cgroup_mutex held.
*/
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
- struct bpf_cgroup_link *link, enum bpf_attach_type type)
+ struct bpf_cgroup_link *link, enum bpf_attach_type type,
+ int bpf_attach_subtype)
{
enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
@@ -708,7 +768,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
pl->link = NULL;
- err = update_effective_progs(cgrp, atype);
+ err = update_effective_progs(cgrp, atype, bpf_attach_subtype);
if (err)
goto cleanup;
@@ -809,7 +869,7 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
}
}
- ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
+ ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL, attr->replace_bpf_fd,
attr->attach_type, attr->attach_flags);
if (replace_prog)
@@ -832,7 +892,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
if (IS_ERR(prog))
prog = NULL;
- ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
+ ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, attr->replace_bpf_fd);
if (prog)
bpf_prog_put(prog);
@@ -861,7 +921,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
}
WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
- cg_link->type));
+ cg_link->type, -1));
cg = cg_link->cgroup;
cg_link->cgroup = NULL;
@@ -961,7 +1021,7 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
goto out_put_cgroup;
}
- err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
+ err = cgroup_bpf_attach(cgrp, NULL, NULL, link, -1,
link->type, BPF_F_ALLOW_MULTI);
if (err) {
bpf_link_cleanup(&link_primer);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a645b212b69b..17a1269dc2f9 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6626,25 +6626,26 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
#ifdef CONFIG_CGROUP_BPF
int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
- struct bpf_cgroup_link *link,
+ struct bpf_cgroup_link *link, int bpf_attach_subtype,
enum bpf_attach_type type,
u32 flags)
{
int ret;
mutex_lock(&cgroup_mutex);
- ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
+ ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link,
+ bpf_attach_subtype, type, flags);
mutex_unlock(&cgroup_mutex);
return ret;
}
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type)
+ enum bpf_attach_type type, int bpf_attach_subtype)
{
int ret;
mutex_lock(&cgroup_mutex);
- ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
+ ret = __cgroup_bpf_detach(cgrp, prog, NULL, type, bpf_attach_subtype);
mutex_unlock(&cgroup_mutex);
return ret;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d34e00749c9b..71056af4322b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -12511,6 +12511,51 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
}
EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+int perf_event_create_for_all_cpus(struct perf_event_attr *attr,
+ struct cgroup *cgroup,
+ struct list_head *entries)
+{
+ struct perf_event **events;
+ struct perf_cgroup *perf_cgrp;
+ int cpu, i = 0;
+
+ events = kzalloc(sizeof(struct perf_event *) * num_possible_cpus(),
+ GFP_KERNEL);
+
+ if (!events)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ /* allocate first, connect the cgroup later */
+ events[i] = perf_event_create_kernel_counter(attr, cpu, NULL, NULL, NULL);
+
+ if (IS_ERR(events[i]))
+ goto err;
+
+ i++;
+ }
+
+ perf_cgrp = cgroup_tryget_perf_cgroup(cgroup);
+ if (!perf_cgrp)
+ goto err;
+
+ for (i--; i >= 0; i--) {
+ events[i]->cgrp = perf_cgrp;
+
+ list_add(&events[i]->bpf_cg_list, entries);
+ }
+
+ kfree(events);
+ return 0;
+
+err:
+ for (i--; i >= 0; i--)
+ free_event(events[i]);
+
+ kfree(events);
+ return -ENOMEM;
+}
+
void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
{
struct perf_event_context *src_ctx;
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread