* [PATCH bpf-next v4] bpf: fix NULL pointer dereference in bpf_get_local_storage() helper
@ 2021-03-23 5:51 Yonghong Song
2021-03-26 1:38 ` Alexei Starovoitov
0 siblings, 1 reply; 3+ messages in thread
From: Yonghong Song @ 2021-03-23 5:51 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Jiri Olsa,
Roman Gushchin
Jiri Olsa reported a bug ([1]) in kernel where cgroup local
storage pointer may be NULL in bpf_get_local_storage() helper.
There are two issues uncovered by this bug:
(1). kprobe or tracepoint prog incorrectly sets cgroup local storage
before prog run,
(2). due to change from preempt_disable to migrate_disable,
preemption is possible and percpu storage might be overwritten
by other tasks.
This issue (1) is fixed in [2]. This patch tried to address issue (2).
The following shows how things can go wrong:
task 1: bpf_cgroup_storage_set() for percpu local storage
preemption happens
task 2: bpf_cgroup_storage_set() for percpu local storage
preemption happens
task 1: run bpf program
task 1 will effectively use the percpu local storage setting by task 2
which will be either NULL or incorrect ones.
Instead of just one common local storage per cpu, this patch fixed
the issue by permitting 8 local storages per cpu and each local
storage is identified by a task_struct pointer. This way, we
allow at most 8 nested preemption between bpf_cgroup_storage_set()
and bpf_cgroup_storage_unset(). The percpu local storage slot
is released (calling bpf_cgroup_storage_unset()) by the same task
after bpf program finished running.
bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
interface.
The patch is tested on top of [2] with reproducer in [1].
Without this patch, kernel will emit error in 2-3 minutes.
With this patch, after one hour, still no error.
[1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T
[2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com
Cc: Jiri Olsa <jolsa@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
include/linux/bpf-cgroup.h | 57 ++++++++++++++++++++++++++++++++------
include/linux/bpf.h | 22 ++++++++++++---
kernel/bpf/helpers.c | 15 +++++++---
kernel/bpf/local_storage.c | 5 ++--
net/bpf/test_run.c | 6 +++-
5 files changed, 86 insertions(+), 19 deletions(-)
Changelogs:
v3 -> v4:
. fixed incorrect link [2] and added Roman's Ack.
v2 -> v3:
. merge two patches as bpf_test_run() will have compilation error
and may fail with other changes.
. rewrite bpf_cgroup_storage_set() to be more inline with kernel
coding style.
v1 -> v2:
. fix compilation issues when CONFIG_CGROUPS is off or
CONFIG_CGROUP_BPF is off.
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c42e02b4d84b..6a29fe11485d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
struct ctl_table;
struct ctl_table_header;
+struct task_struct;
#ifdef CONFIG_CGROUP_BPF
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
- bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+#define BPF_CGROUP_STORAGE_NEST_MAX 8
+
+struct bpf_cgroup_storage_info {
+ struct task_struct *task;
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+};
+
+/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
+ * to use bpf cgroup storage simultaneously.
+ */
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+ bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
return BPF_CGROUP_STORAGE_SHARED;
}
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
- *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
+ *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{
enum bpf_cgroup_storage_type stype;
+ int i, err = 0;
+
+ preempt_disable();
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
+ continue;
+
+ this_cpu_write(bpf_cgroup_storage_info[i].task, current);
+ for_each_cgroup_storage_type(stype)
+ this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
+ storage[stype]);
+ goto out;
+ }
+ err = -EBUSY;
+ WARN_ON_ONCE(1);
+
+out:
+ preempt_enable();
+ return err;
+}
+
+static inline void bpf_cgroup_storage_unset(void)
+{
+ int i;
+
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+ continue;
- for_each_cgroup_storage_type(stype)
- this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
+ this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
+ return;
+ }
}
struct bpf_cgroup_storage *
@@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}
-static inline void bpf_cgroup_storage_set(
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
+static inline int bpf_cgroup_storage_set(
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
+static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a47285cd39c2..3a6ae69743ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1090,6 +1090,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0)
+/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
+ * if bpf_cgroup_storage_set() failed, the rest of programs
+ * will not execute. This should be a really rare scenario
+ * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
+ * preemptions all between bpf_cgroup_storage_set() and
+ * bpf_cgroup_storage_unset() on the same cpu.
+ */
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
({ \
struct bpf_prog_array_item *_item; \
@@ -1102,10 +1109,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
+ if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
+ break; \
func_ret = func(_prog, ctx); \
_ret &= (func_ret & 1); \
*(ret_flags) |= (func_ret >> 1); \
+ bpf_cgroup_storage_unset(); \
_item++; \
} \
rcu_read_unlock(); \
@@ -1126,9 +1135,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
goto _out; \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
- if (set_cg_storage) \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
- _ret &= func(_prog, ctx); \
+ if (!set_cg_storage) { \
+ _ret &= func(_prog, ctx); \
+ } else { \
+ if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
+ break; \
+ _ret &= func(_prog, ctx); \
+ bpf_cgroup_storage_unset(); \
+ } \
_item++; \
} \
_out: \
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 074800226327..f306611c4ddf 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
};
#ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
- bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+ bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{
@@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
* verifier checks that its value is correct.
*/
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
- struct bpf_cgroup_storage *storage;
+ struct bpf_cgroup_storage *storage = NULL;
void *ptr;
+ int i;
- storage = this_cpu_read(bpf_cgroup_storage[stype]);
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+ continue;
+
+ storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
+ break;
+ }
if (stype == BPF_CGROUP_STORAGE_SHARED)
ptr = &READ_ONCE(storage->buf)->data[0];
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 2d4f9ac12377..bd11db9774c3 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -9,10 +9,11 @@
#include <linux/slab.h>
#include <uapi/linux/btf.h>
-DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
-
#ifdef CONFIG_CGROUP_BPF
+DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
+ bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
+
#include "../cgroup/cgroup-internal.h"
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0abdd67f44b1..4aabf71cd95d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -106,12 +106,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
bpf_test_timer_enter(&t);
do {
- bpf_cgroup_storage_set(storage);
+ ret = bpf_cgroup_storage_set(storage);
+ if (ret)
+ break;
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = BPF_PROG_RUN(prog, ctx);
+
+ bpf_cgroup_storage_unset();
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
bpf_test_timer_leave(&t);
--
2.30.2
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH bpf-next v4] bpf: fix NULL pointer dereference in bpf_get_local_storage() helper
2021-03-23 5:51 [PATCH bpf-next v4] bpf: fix NULL pointer dereference in bpf_get_local_storage() helper Yonghong Song
@ 2021-03-26 1:38 ` Alexei Starovoitov
2021-03-26 11:50 ` Jiri Olsa
0 siblings, 1 reply; 3+ messages in thread
From: Alexei Starovoitov @ 2021-03-26 1:38 UTC (permalink / raw)
To: Yonghong Song, bpf
Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Jiri Olsa,
Roman Gushchin
On 3/22/21 10:51 PM, Yonghong Song wrote:
> Jiri Olsa reported a bug ([1]) in kernel where cgroup local
> storage pointer may be NULL in bpf_get_local_storage() helper.
> There are two issues uncovered by this bug:
> (1). kprobe or tracepoint prog incorrectly sets cgroup local storage
> before prog run,
> (2). due to change from preempt_disable to migrate_disable,
> preemption is possible and percpu storage might be overwritten
> by other tasks.
>
> This issue (1) is fixed in [2]. This patch tried to address issue (2).
> The following shows how things can go wrong:
> task 1: bpf_cgroup_storage_set() for percpu local storage
> preemption happens
> task 2: bpf_cgroup_storage_set() for percpu local storage
> preemption happens
> task 1: run bpf program
>
> task 1 will effectively use the percpu local storage setting by task 2
> which will be either NULL or incorrect ones.
>
> Instead of just one common local storage per cpu, this patch fixed
> the issue by permitting 8 local storages per cpu and each local
> storage is identified by a task_struct pointer. This way, we
> allow at most 8 nested preemption between bpf_cgroup_storage_set()
> and bpf_cgroup_storage_unset(). The percpu local storage slot
> is released (calling bpf_cgroup_storage_unset()) by the same task
> after bpf program finished running.
> bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
> interface.
>
> The patch is tested on top of [2] with reproducer in [1].
> Without this patch, kernel will emit error in 2-3 minutes.
> With this patch, after one hour, still no error.
>
> [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T
> [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com
>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Acked-by: Roman Gushchin <guro@fb.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
Applied to bpf-next. Thanks
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH bpf-next v4] bpf: fix NULL pointer dereference in bpf_get_local_storage() helper
2021-03-26 1:38 ` Alexei Starovoitov
@ 2021-03-26 11:50 ` Jiri Olsa
0 siblings, 0 replies; 3+ messages in thread
From: Jiri Olsa @ 2021-03-26 11:50 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Yonghong Song, bpf, Alexei Starovoitov, Daniel Borkmann,
kernel-team, Jiri Olsa, Roman Gushchin
On Thu, Mar 25, 2021 at 06:38:54PM -0700, Alexei Starovoitov wrote:
> On 3/22/21 10:51 PM, Yonghong Song wrote:
> > Jiri Olsa reported a bug ([1]) in kernel where cgroup local
> > storage pointer may be NULL in bpf_get_local_storage() helper.
> > There are two issues uncovered by this bug:
> > (1). kprobe or tracepoint prog incorrectly sets cgroup local storage
> > before prog run,
> > (2). due to change from preempt_disable to migrate_disable,
> > preemption is possible and percpu storage might be overwritten
> > by other tasks.
> >
> > This issue (1) is fixed in [2]. This patch tried to address issue (2).
> > The following shows how things can go wrong:
> > task 1: bpf_cgroup_storage_set() for percpu local storage
> > preemption happens
> > task 2: bpf_cgroup_storage_set() for percpu local storage
> > preemption happens
> > task 1: run bpf program
> >
> > task 1 will effectively use the percpu local storage setting by task 2
> > which will be either NULL or incorrect ones.
> >
> > Instead of just one common local storage per cpu, this patch fixed
> > the issue by permitting 8 local storages per cpu and each local
> > storage is identified by a task_struct pointer. This way, we
> > allow at most 8 nested preemption between bpf_cgroup_storage_set()
> > and bpf_cgroup_storage_unset(). The percpu local storage slot
> > is released (calling bpf_cgroup_storage_unset()) by the same task
> > after bpf program finished running.
> > bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
> > interface.
> >
> > The patch is tested on top of [2] with reproducer in [1].
> > Without this patch, kernel will emit error in 2-3 minutes.
> > With this patch, after one hour, still no error.
> >
> > [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T
> > [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com
> >
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > Acked-by: Roman Gushchin <guro@fb.com>
> > Signed-off-by: Yonghong Song <yhs@fb.com>
>
> Applied to bpf-next. Thanks
>
sorry for late reply, FWIW I dont see the issue anymore with the patch
thanks,
jirka
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-03-26 11:51 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-23 5:51 [PATCH bpf-next v4] bpf: fix NULL pointer dereference in bpf_get_local_storage() helper Yonghong Song
2021-03-26 1:38 ` Alexei Starovoitov
2021-03-26 11:50 ` Jiri Olsa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).