From: Daniel Mack <daniel-cYrQPVfZoowdnm+yROfE0A@public.gmane.org>
To: htejun-b10kYP2dOMg@public.gmane.org,
daniel-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org,
ast-b10kYP2dOMg@public.gmane.org
Cc: davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org,
kafai-b10kYP2dOMg@public.gmane.org,
fw-HFFVJYpyMKqzQB+pC5nmwQ@public.gmane.org,
pablo-Cap9r6Oaw4JrovVCs/uTlw@public.gmane.org,
harald-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
sargun-GaZTRHToo+CzQB+pC5nmwQ@public.gmane.org,
cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Daniel Mack <daniel-cYrQPVfZoowdnm+yROfE0A@public.gmane.org>
Subject: [PATCH v7 2/6] cgroup: add support for eBPF programs
Date: Tue, 25 Oct 2016 12:14:10 +0200 [thread overview]
Message-ID: <1477390454-12553-3-git-send-email-daniel@zonque.org> (raw)
In-Reply-To: <1477390454-12553-1-git-send-email-daniel-cYrQPVfZoowdnm+yROfE0A@public.gmane.org>
This patch adds two sets of eBPF program pointers to struct cgroup.
One for such that are directly pinned to a cgroup, and one for such
that are effective for it.
To illustrate the logic behind that, assume the following example
cgroup hierarchy.
A - B - C
\ D - E
If only B has a program attached, it will be effective for B, C, D
and E. If D then attaches a program itself, that will be effective for
both D and E, and the program in B will only affect B and C. Only one
program of a given type is effective for a cgroup.
Attaching and detaching programs will be done through the bpf(2)
syscall. For now, ingress and egress inet socket filtering are the
only supported use-cases.
Signed-off-by: Daniel Mack <daniel-cYrQPVfZoowdnm+yROfE0A@public.gmane.org>
Acked-by: Alexei Starovoitov <ast-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
include/linux/bpf-cgroup.h | 71 +++++++++++++++++++
include/linux/cgroup-defs.h | 4 ++
init/Kconfig | 12 ++++
kernel/bpf/Makefile | 1 +
kernel/bpf/cgroup.c | 167 ++++++++++++++++++++++++++++++++++++++++++++
kernel/cgroup.c | 18 +++++
6 files changed, 273 insertions(+)
create mode 100644 include/linux/bpf-cgroup.h
create mode 100644 kernel/bpf/cgroup.c
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
new file mode 100644
index 0000000..fc076de
--- /dev/null
+++ b/include/linux/bpf-cgroup.h
@@ -0,0 +1,71 @@
+#ifndef _BPF_CGROUP_H
+#define _BPF_CGROUP_H
+
+#include <linux/bpf.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/bpf.h>
+
+struct sock;
+struct cgroup;
+struct sk_buff;
+
+#ifdef CONFIG_CGROUP_BPF
+
+extern struct static_key_false cgroup_bpf_enabled_key;
+#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+
+struct cgroup_bpf {
+ /*
+ * Store two sets of bpf_prog pointers, one for programs that are
+ * pinned directly to this cgroup, and one for those that are effective
+ * when this cgroup is accessed.
+ */
+ struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
+ struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
+};
+
+void cgroup_bpf_put(struct cgroup *cgrp);
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
+
+void __cgroup_bpf_update(struct cgroup *cgrp,
+ struct cgroup *parent,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type);
+
+/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
+void cgroup_bpf_update(struct cgroup *cgrp,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type);
+
+int __cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type);
+
+/* Wrapper for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled */
+static inline int cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type)
+{
+ if (cgroup_bpf_enabled)
+ return __cgroup_bpf_run_filter(sk, skb, type);
+
+ return 0;
+}
+
+#else
+
+struct cgroup_bpf {};
+static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
+ struct cgroup *parent) {}
+
+static inline int cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type)
+{
+ return 0;
+}
+
+#endif /* CONFIG_CGROUP_BPF */
+
+#endif /* _BPF_CGROUP_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5b17de6..861b467 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -16,6 +16,7 @@
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
#include <linux/workqueue.h>
+#include <linux/bpf-cgroup.h>
#ifdef CONFIG_CGROUPS
@@ -300,6 +301,9 @@ struct cgroup {
/* used to schedule release agent */
struct work_struct release_agent_work;
+ /* used to store eBPF programs */
+ struct cgroup_bpf bpf;
+
/* ids of the ancestors at each level including self */
int ancestor_ids[];
};
diff --git a/init/Kconfig b/init/Kconfig
index 34407f1..405120b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1154,6 +1154,18 @@ config CGROUP_PERF
Say N if unsure.
+config CGROUP_BPF
+ bool "Support for eBPF programs attached to cgroups"
+ depends on BPF_SYSCALL && SOCK_CGROUP_DATA
+ help
+ Allow attaching eBPF programs to a cgroup using the bpf(2)
+ syscall command BPF_PROG_ATTACH.
+
+ In which context these programs are accessed depends on the type
+ of attachment. For instance, programs that are attached using
+ BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
+ inet sockets.
+
config CGROUP_DEBUG
bool "Example controller"
default n
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index eed911d..b22256b 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
+obj-$(CONFIG_CGROUP_BPF) += cgroup.o
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
new file mode 100644
index 0000000..a0ab43f
--- /dev/null
+++ b/kernel/bpf/cgroup.c
@@ -0,0 +1,167 @@
+/*
+ * Functions to manage eBPF programs attached to cgroups
+ *
+ * Copyright (c) 2016 Daniel Mack
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License. See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/bpf-cgroup.h>
+#include <net/sock.h>
+
+DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
+EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+
+/**
+ * cgroup_bpf_put() - put references of all bpf programs
+ * @cgrp: the cgroup to modify
+ */
+void cgroup_bpf_put(struct cgroup *cgrp)
+{
+ unsigned int type;
+
+ for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
+ struct bpf_prog *prog = cgrp->bpf.prog[type];
+
+ if (prog) {
+ bpf_prog_put(prog);
+ static_branch_dec(&cgroup_bpf_enabled_key);
+ }
+ }
+}
+
+/**
+ * cgroup_bpf_inherit() - inherit effective programs from parent
+ * @cgrp: the cgroup to modify
+ * @parent: the parent to inherit from
+ */
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
+{
+ unsigned int type;
+
+ for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
+ struct bpf_prog *e;
+
+ e = rcu_dereference_protected(parent->bpf.effective[type],
+ lockdep_is_held(&cgroup_mutex));
+ rcu_assign_pointer(cgrp->bpf.effective[type], e);
+ }
+}
+
+/**
+ * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
+ * propagate the change to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
+ * @prog: A new program to pin
+ * @type: Type of pinning operation (ingress/egress)
+ *
+ * Each cgroup has a set of two pointers for bpf programs; one for eBPF
+ * programs it owns, and which is effective for execution.
+ *
+ * If @prog is %NULL, this function attaches a new program to the cgroup and
+ * releases the one that is currently attached, if any. @prog is then made
+ * the effective program of type @type in that cgroup.
+ *
+ * If @prog is %NULL, the currently attached program of type @type is released,
+ * and the effective program of the parent cgroup (if any) is inherited to
+ * @cgrp.
+ *
+ * Then, the descendants of @cgrp are walked and the effective program for
+ * each of them is set to the effective program of @cgrp unless the
+ * descendant has its own program attached, in which case the subbranch is
+ * skipped. This ensures that delegated subcgroups with own programs are left
+ * untouched.
+ *
+ * Must be called with cgroup_mutex held.
+ */
+void __cgroup_bpf_update(struct cgroup *cgrp,
+ struct cgroup *parent,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type)
+{
+ struct bpf_prog *old_prog, *effective;
+ struct cgroup_subsys_state *pos;
+
+ old_prog = xchg(cgrp->bpf.prog + type, prog);
+
+ effective = (!prog && parent) ?
+ rcu_dereference_protected(parent->bpf.effective[type],
+ lockdep_is_held(&cgroup_mutex)) :
+ prog;
+
+ css_for_each_descendant_pre(pos, &cgrp->self) {
+ struct cgroup *desc = container_of(pos, struct cgroup, self);
+
+ /* skip the subtree if the descendant has its own program */
+ if (desc->bpf.prog[type] && desc != cgrp)
+ pos = css_rightmost_descendant(pos);
+ else
+ rcu_assign_pointer(desc->bpf.effective[type],
+ effective);
+ }
+
+ if (prog)
+ static_branch_inc(&cgroup_bpf_enabled_key);
+
+ if (old_prog) {
+ bpf_prog_put(old_prog);
+ static_branch_dec(&cgroup_bpf_enabled_key);
+ }
+}
+
+/**
+ * __cgroup_bpf_run_filter() - Run a program for packet filtering
+ * @sk: The socken sending or receiving traffic
+ * @skb: The skb that is being sent or received
+ * @type: The type of program to be exectuted
+ *
+ * If no socket is passed, or the socket is not of type INET or INET6,
+ * this function does nothing and returns 0.
+ *
+ * The program type passed in via @type must be suitable for network
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type)
+{
+ struct bpf_prog *prog;
+ struct cgroup *cgrp;
+ int ret = 0;
+
+ if (!sk || !sk_fullsock(sk))
+ return 0;
+
+ if (sk->sk_family != AF_INET &&
+ sk->sk_family != AF_INET6)
+ return 0;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+
+ rcu_read_lock();
+
+ prog = rcu_dereference(cgrp->bpf.effective[type]);
+ if (prog) {
+ unsigned int offset = skb->data - skb_network_header(skb);
+
+ __skb_push(skb, offset);
+ ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
+ __skb_pull(skb, offset);
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 85bc9be..2ee9ec3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5074,6 +5074,8 @@ static void css_release_work_fn(struct work_struct *work)
if (cgrp->kn)
RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
NULL);
+
+ cgroup_bpf_put(cgrp);
}
mutex_unlock(&cgroup_mutex);
@@ -5281,6 +5283,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
if (!cgroup_on_dfl(cgrp))
cgrp->subtree_control = cgroup_control(cgrp);
+ if (parent)
+ cgroup_bpf_inherit(cgrp, parent);
+
cgroup_propagate_control(cgrp);
/* @cgrp doesn't have dir yet so the following will only create csses */
@@ -6495,6 +6500,19 @@ static __init int cgroup_namespaces_init(void)
}
subsys_initcall(cgroup_namespaces_init);
+#ifdef CONFIG_CGROUP_BPF
+void cgroup_bpf_update(struct cgroup *cgrp,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type)
+{
+ struct cgroup *parent = cgroup_parent(cgrp);
+
+ mutex_lock(&cgroup_mutex);
+ __cgroup_bpf_update(cgrp, parent, prog, type);
+ mutex_unlock(&cgroup_mutex);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
#ifdef CONFIG_CGROUP_DEBUG
static struct cgroup_subsys_state *
debug_css_alloc(struct cgroup_subsys_state *parent_css)
--
2.7.4
next prev parent reply other threads:[~2016-10-25 10:14 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-25 10:14 [PATCH v7 0/6] Add eBPF hooks for cgroups Daniel Mack
2016-10-25 10:14 ` [PATCH v7 1/6] bpf: add new prog type for cgroup socket filtering Daniel Mack
2016-10-25 10:14 ` [PATCH v7 4/6] net: filter: run cgroup eBPF ingress programs Daniel Mack
[not found] ` <1477390454-12553-1-git-send-email-daniel-cYrQPVfZoowdnm+yROfE0A@public.gmane.org>
2016-10-25 10:14 ` Daniel Mack [this message]
2016-10-25 10:14 ` [PATCH v7 3/6] bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH commands Daniel Mack
2016-10-25 10:14 ` [PATCH v7 5/6] net: ipv4, ipv6: run cgroup eBPF egress programs Daniel Mack
2016-10-31 16:40 ` David Miller
[not found] ` <20161031.124003.1361406552151798940.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2016-11-02 1:17 ` Daniel Borkmann
2016-10-25 10:14 ` [PATCH v7 6/6] samples: bpf: add userspace example for attaching eBPF programs to cgroups Daniel Mack
2016-10-26 19:59 ` [PATCH v7 0/6] Add eBPF hooks for cgroups Pablo Neira Ayuso
2016-10-27 3:35 ` Alexei Starovoitov
[not found] ` <20161027033502.GA43960-+o4/htvd0TDFYCXBM6kdu7fOX0fSgVTm@public.gmane.org>
2016-10-28 11:28 ` Pablo Neira Ayuso
2016-10-28 15:00 ` David Ahern
2016-10-29 1:42 ` Alexei Starovoitov
2016-10-27 8:40 ` Daniel Mack
[not found] ` <c9683122-d770-355b-e275-7c446e6d1d0f-cYrQPVfZoowdnm+yROfE0A@public.gmane.org>
2016-10-28 11:53 ` Pablo Neira Ayuso
2016-10-28 12:07 ` Daniel Mack
2016-10-29 3:51 ` Lorenzo Colitti
[not found] ` <CAKD1Yr2aRDNUxX8onReZyURufphxGoSTek=Fjk3Wswq9WOVp4w-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-10-29 4:51 ` Alexei Starovoitov
[not found] ` <20161029045107.GA61294-+o4/htvd0TDFYCXBM6kdu7fOX0fSgVTm@public.gmane.org>
2016-10-29 4:59 ` Lorenzo Colitti
[not found] ` <CAKD1Yr2pMk52h7BdRwTvGwnP5+ONmr4ac6cyUBoZ9P+Kt-B8jw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-10-29 6:24 ` Alexei Starovoitov
2016-10-29 15:34 ` Lorenzo Colitti
2016-10-29 20:29 ` Daniel Borkmann
2016-11-01 15:25 ` Lorenzo Colitti
[not found] ` <CAKD1Yr02SCHvd-xZJL14d_Ta8Dk4evHZ60zytpU0h4r80FucwA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-11-01 15:38 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1477390454-12553-3-git-send-email-daniel@zonque.org \
--to=daniel-cyrqpvfzoowdnm+yrofe0a@public.gmane.org \
--cc=ast-b10kYP2dOMg@public.gmane.org \
--cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=daniel-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org \
--cc=davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org \
--cc=fw-HFFVJYpyMKqzQB+pC5nmwQ@public.gmane.org \
--cc=harald-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=htejun-b10kYP2dOMg@public.gmane.org \
--cc=kafai-b10kYP2dOMg@public.gmane.org \
--cc=netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=pablo-Cap9r6Oaw4JrovVCs/uTlw@public.gmane.org \
--cc=sargun-GaZTRHToo+CzQB+pC5nmwQ@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).