All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v3 1/2] bpf: Add bpf_current_task_in_cgroup helper
@ 2016-08-11 18:51 Sargun Dhillon
  2016-08-11 20:24 ` Daniel Borkmann
  2016-08-11 21:47 ` Tejun Heo
  0 siblings, 2 replies; 3+ messages in thread
From: Sargun Dhillon @ 2016-08-11 18:51 UTC (permalink / raw)
  To: netdev; +Cc: alexei.starovoitov, daniel, tj

This adds a bpf helper that's similar to the skb_in_cgroup helper to check
whether the probe is currently executing in the context of a specific
subset of the cgroupsv2 hierarchy. It does this based on membership test
for a cgroup arraymap. It is invalid to call this in an interrupt, and
it'll return an error. The helper is primarily to be used in debugging
activities for containers, where you may have multiple programs running in
a given top-level "container".

This patch also genericizes some of the arraymap fetching logic between the
skb_in_cgroup helper and this new helper.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tejun Heo <tj@kernel.org>
---
 include/linux/bpf.h      | 22 ++++++++++++++++++++++
 include/linux/cgroup.h   | 23 +++++++++++++++++++++++
 include/uapi/linux/bpf.h | 11 +++++++++++
 kernel/bpf/arraymap.c    |  2 +-
 kernel/bpf/verifier.c    |  4 +++-
 kernel/trace/bpf_trace.c | 27 +++++++++++++++++++++++++++
 net/core/filter.c        | 11 ++++-------
 7 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1113423..6c01ab1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -319,4 +319,26 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
+/* Helper to fetch a cgroup pointer based on index.
+ * @map: a cgroup arraymap
+ * @idx: index of the item you want to fetch
+ *
+ * Returns pointer on success,
+ * Error code if item not found, or out-of-bounds access
+ */
+static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
+{
+	struct cgroup *cgrp;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+	if (unlikely(idx >= array->map.max_entries))
+		return ERR_PTR(-E2BIG);
+
+	cgrp = READ_ONCE(array->ptrs[idx]);
+	if (unlikely(!cgrp))
+		return ERR_PTR(-EAGAIN);
+
+	return cgrp;
+}
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 984f73b..d4e173d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
 	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
 }
 
+/**
+ * task_in_cgroup_hierarchy - test task's membership of cgroup ancestry
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
+ *
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
+ */
+static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
+					    struct cgroup *ancestor)
+{
+	struct css_set *cset = task_css_set(task);
+
+	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
+}
+
 /* no synchronization, the result can only be used as a hint */
 static inline bool cgroup_is_populated(struct cgroup *cgrp)
 {
@@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
+struct cgroup;
 
 static inline void css_put(struct cgroup_subsys_state *css) {}
 static inline int cgroup_attach_task_all(struct task_struct *from,
@@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {}
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 
+static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
+					    struct cgroup *ancestor)
+{
+	return false;
+}
 #endif /* !CONFIG_CGROUPS */
 
 /*
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fe..64b1a07 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -375,6 +375,17 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_probe_write_user,
 
+	/**
+	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 current failed the cgroup2 descendant test
+	 *   == 1 current succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_current_task_in_cgroup,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650..a2ac051 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
 }
 late_initcall(register_perf_event_array_map);
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
 				     struct file *map_file /* not used */,
 				     int fd)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7094c69..80efab8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
-		if (func_id != BPF_FUNC_skb_in_cgroup)
+		if (func_id != BPF_FUNC_skb_in_cgroup &&
+		    func_id != BPF_FUNC_current_task_in_cgroup)
 			goto error;
 		break;
 	default:
@@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
+	case BPF_FUNC_current_task_in_cgroup:
 	case BPF_FUNC_skb_in_cgroup:
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438f..57ea155 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -376,6 +376,31 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct bpf_map *map = (struct bpf_map *)(long)r1;
+	struct cgroup *cgrp;
+	u32 idx = (u32)r2;
+
+	if (unlikely(in_interrupt()))
+		return -EINVAL;
+
+	cgrp = fetch_arraymap_ptr(map, idx);
+
+	if (unlikely(IS_ERR(cgrp)))
+		return PTR_ERR(cgrp);
+
+	return task_in_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_in_cgroup_proto = {
+	.func           = bpf_current_task_in_cgroup,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -407,6 +432,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_probe_write_user:
 		return bpf_get_probe_write_proto();
+	case BPF_FUNC_current_task_in_cgroup:
+		return &bpf_current_task_in_cgroup_proto;
 	default:
 		return NULL;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index b5add4e..7fda861 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2321,21 +2321,18 @@ static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long)r1;
 	struct bpf_map *map = (struct bpf_map *)(long)r2;
-	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct cgroup *cgrp;
 	struct sock *sk;
-	u32 i = (u32)r3;
+	u32 idx = (u32)r3;
 
 	sk = skb->sk;
 	if (!sk || !sk_fullsock(sk))
 		return -ENOENT;
 
-	if (unlikely(i >= array->map.max_entries))
-		return -E2BIG;
+	cgrp = fetch_arraymap_ptr(map, idx);
 
-	cgrp = READ_ONCE(array->ptrs[i]);
-	if (unlikely(!cgrp))
-		return -EAGAIN;
+	if (unlikely(IS_ERR(cgrp)))
+		return PTR_ERR(cgrp);
 
 	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next v3 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-11 18:51 [PATCH net-next v3 1/2] bpf: Add bpf_current_task_in_cgroup helper Sargun Dhillon
@ 2016-08-11 20:24 ` Daniel Borkmann
  2016-08-11 21:47 ` Tejun Heo
  1 sibling, 0 replies; 3+ messages in thread
From: Daniel Borkmann @ 2016-08-11 20:24 UTC (permalink / raw)
  To: Sargun Dhillon, netdev; +Cc: alexei.starovoitov, tj

Hi Sargun,

just some minor comment inline.

On 08/11/2016 08:51 PM, Sargun Dhillon wrote:
[...]
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 1113423..6c01ab1 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -319,4 +319,26 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
>   void bpf_user_rnd_init_once(void);
>   u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>
> +/* Helper to fetch a cgroup pointer based on index.
> + * @map: a cgroup arraymap
> + * @idx: index of the item you want to fetch
> + *
> + * Returns pointer on success,
> + * Error code if item not found, or out-of-bounds access
> + */
> +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> +{
> +	struct cgroup *cgrp;
> +	struct bpf_array *array = container_of(map, struct bpf_array, map);

Nit, please keep it in this reverse tree like order (whenever possible)
to be consistent with kernel coding style:

	struct bpf_array *array = container_of(map, struct bpf_array, map);
	struct cgroup *cgrp;

> +	if (unlikely(idx >= array->map.max_entries))
> +		return ERR_PTR(-E2BIG);
> +
> +	cgrp = READ_ONCE(array->ptrs[idx]);
> +	if (unlikely(!cgrp))
> +		return ERR_PTR(-EAGAIN);
> +
> +	return cgrp;
> +}

I think this inline helper doesn't buy us too much to be honest. First,
it really should be prefixed with bpf_* if this is exposed like this to
avoid potential naming clashes with other headers, but apart from that
a generic function to fetch an array map pointer returning a cgroup
is also not really generic. We have other specialized array maps also
fetching a pointer apart from cgroup, so either we make a real generic
helper for use in all of them, or just add the content of above into the
bpf_current_task_in_cgroup() helper directly. Probably just going for
the latter is more straight forward and likely also smaller diff.

Rest looks otherwise good to me, thanks!

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next v3 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-11 18:51 [PATCH net-next v3 1/2] bpf: Add bpf_current_task_in_cgroup helper Sargun Dhillon
  2016-08-11 20:24 ` Daniel Borkmann
@ 2016-08-11 21:47 ` Tejun Heo
  1 sibling, 0 replies; 3+ messages in thread
From: Tejun Heo @ 2016-08-11 21:47 UTC (permalink / raw)
  To: Sargun Dhillon; +Cc: netdev, alexei.starovoitov, daniel

Hello, Sargun.

On Thu, Aug 11, 2016 at 11:51:42AM -0700, Sargun Dhillon wrote:
> This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> whether the probe is currently executing in the context of a specific
> subset of the cgroupsv2 hierarchy. It does this based on membership test
> for a cgroup arraymap. It is invalid to call this in an interrupt, and
> it'll return an error. The helper is primarily to be used in debugging
> activities for containers, where you may have multiple programs running in
> a given top-level "container".
> 
> This patch also genericizes some of the arraymap fetching logic between the
> skb_in_cgroup helper and this new helper.

I think it'd be better to put the changes into separate patches.

> @@ -319,4 +319,26 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
>  void bpf_user_rnd_init_once(void);
>  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  
> +/* Helper to fetch a cgroup pointer based on index.

/**
 *

> + * @map: a cgroup arraymap
> + * @idx: index of the item you want to fetch
> + *
> + * Returns pointer on success,
> + * Error code if item not found, or out-of-bounds access
> + */
> +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
...
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 984f73b..d4e173d 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
>  	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
>  }
>  
> +/**
> + * task_in_cgroup_hierarchy - test task's membership of cgroup ancestry
> + * @task: the task to be tested
> + * @ancestor: possible ancestor of @task's cgroup
> + *
> + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
> + * It follows all the same rules as cgroup_is_descendant, and only applies
> + * to the default hierarchy.
> + */
> +static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
> +					    struct cgroup *ancestor)

Maybe task_is_under_cgroup() is a better name?

> @@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {}
>  static inline int cgroup_init_early(void) { return 0; }
>  static inline int cgroup_init(void) { return 0; }
>  
> +static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
> +					    struct cgroup *ancestor)
> +{
> +	return false;
> +}

If cgroup is not enabled, all tasks are in the root cgroup, so the
test should always return true.

> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -375,6 +375,17 @@ enum bpf_func_id {
>  	 */
>  	BPF_FUNC_probe_write_user,
>  
> +	/**
> +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> +	 * @index: index of the cgroup in the bpf_map
> +	 * Return:
> +	 *   == 0 current failed the cgroup2 descendant test
> +	 *   == 1 current succeeded the cgroup2 descendant test
> +	 *    < 0 error
> +	 */
> +	BPF_FUNC_current_task_in_cgroup,

I think "under" would be better than "in" here.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-08-11 21:58 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-11 18:51 [PATCH net-next v3 1/2] bpf: Add bpf_current_task_in_cgroup helper Sargun Dhillon
2016-08-11 20:24 ` Daniel Borkmann
2016-08-11 21:47 ` Tejun Heo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.