* [RFC bpf-next 1/4] bpf: cgroup_sock lsm flavor
2022-02-16 0:12 [RFC bpf-next 0/4] bpf: cgroup_sock lsm flavor Stanislav Fomichev
@ 2022-02-16 0:12 ` Stanislav Fomichev
2022-02-17 2:38 ` Alexei Starovoitov
2022-02-16 0:12 ` [RFC bpf-next 2/4] bpf: allow writing to sock->sk_priority from lsm progtype Stanislav Fomichev
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: Stanislav Fomichev @ 2022-02-16 0:12 UTC (permalink / raw)
To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
Allow per-cgroup lsm attachment of a subset of hooks that operate
on the 'struct sock'
Expected usage:
1. attach raw tracepoint hook with expected_attach=BPF_LSM_CGROUP_SOCK
2. this causes fmod_ret trampoline that invokes __cgroup_bpf_run_lsm_sock
3. __cgroup_bpf_run_lsm_sock relies on existing cgroup_bpf->effective
array which is extended to include new slots for lsm hooks
4. attach same program to the cgroup_fd
Current limitation:
- abusing x86 jit, not generic
- no proper error handling (detach tracepoint first will probably cause
problems)
- 2 hooks for now for demonstration purposes
- lsm specific, maybe can be extended fentry/fexit/fmod_ret
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
arch/x86/net/bpf_jit_comp.c | 27 +++++++++++++++------
include/linux/bpf-cgroup-defs.h | 4 +++
include/linux/bpf.h | 2 ++
include/uapi/linux/bpf.h | 1 +
kernel/bpf/btf.c | 10 ++++++++
kernel/bpf/cgroup.c | 43 ++++++++++++++++++++++++++++++---
kernel/bpf/syscall.c | 6 ++++-
kernel/bpf/trampoline.c | 1 +
kernel/bpf/verifier.c | 1 +
tools/include/uapi/linux/bpf.h | 1 +
10 files changed, 84 insertions(+), 12 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c7db0fe4de2f..a5225648d091 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1742,6 +1742,8 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
-(stack_size - i * 8));
}
+extern int __cgroup_bpf_run_lsm_sock(u64 *, const struct bpf_prog *);
+
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_prog *p, int stack_size, bool save_ret)
{
@@ -1767,14 +1769,23 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
/* arg1: lea rdi, [rbp - stack_size] */
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
- /* arg2: progs[i]->insnsi for interpreter */
- if (!p->jited)
- emit_mov_imm64(&prog, BPF_REG_2,
- (long) p->insnsi >> 32,
- (u32) (long) p->insnsi);
- /* call JITed bpf program or interpreter */
- if (emit_call(&prog, p->bpf_func, prog))
- return -EINVAL;
+
+ if (p->expected_attach_type == BPF_LSM_CGROUP_SOCK) {
+ /* arg2: progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_2, (long) p >> 32, (u32) (long) p);
+ if (emit_call(&prog, __cgroup_bpf_run_lsm_sock, prog))
+ return -EINVAL;
+ } else {
+ /* arg2: progs[i]->insnsi for interpreter */
+ if (!p->jited)
+ emit_mov_imm64(&prog, BPF_REG_2,
+ (long) p->insnsi >> 32,
+ (u32) (long) p->insnsi);
+
+ /* call JITed bpf program or interpreter */
+ if (emit_call(&prog, p->bpf_func, prog))
+ return -EINVAL;
+ }
/*
* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 695d1224a71b..72498d2c2552 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -10,6 +10,8 @@
struct bpf_prog_array;
+#define CGROUP_LSM_SOCK_NUM 2
+
enum cgroup_bpf_attach_type {
CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
CGROUP_INET_INGRESS = 0,
@@ -35,6 +37,8 @@ enum cgroup_bpf_attach_type {
CGROUP_INET4_GETSOCKNAME,
CGROUP_INET6_GETSOCKNAME,
CGROUP_INET_SOCK_RELEASE,
+ CGROUP_LSM_SOCK_START,
+ CGROUP_LSM_SOCK_END = CGROUP_LSM_SOCK_START + CGROUP_LSM_SOCK_NUM,
MAX_CGROUP_BPF_ATTACH_TYPE
};
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2fc7e5c5ef41..ed215e4440da 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -975,6 +975,7 @@ struct bpf_prog_aux {
u64 load_time; /* ns since boottime */
u32 verified_insns;
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+ int cgroup_atype; /* enum cgroup_bpf_attach_type */
char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
void *security;
@@ -2367,6 +2368,7 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len);
struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
+int btf_id_set_index(const struct btf_id_set *set, u32 id);
#define MAX_BPRINTF_VARARGS 12
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index afe3d0d7f5f2..286e55a2a852 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -997,6 +997,7 @@ enum bpf_attach_type {
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
BPF_PERF_EVENT,
+ BPF_LSM_CGROUP_SOCK,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 11740b300de9..74cf158117b6 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4928,6 +4928,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (arg == nr_args) {
switch (prog->expected_attach_type) {
+ case BPF_LSM_CGROUP_SOCK:
case BPF_LSM_MAC:
case BPF_TRACE_FEXIT:
/* When LSM programs are attached to void LSM hooks
@@ -6338,6 +6339,15 @@ static int btf_id_cmp_func(const void *a, const void *b)
return *pa - *pb;
}
+int btf_id_set_index(const struct btf_id_set *set, u32 id)
+{
+ const u32 *p;
+ p = bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func);
+ if (!p)
+ return -1;
+ return p - set->ids;
+}
+
bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
{
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 098632fdbc45..503603667842 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
+#include <linux/btf_ids.h>
#include <net/sock.h>
#include <net/bpf_sk_storage.h>
@@ -417,6 +418,11 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
return NULL;
}
+BTF_SET_START(lsm_cgroup_sock)
+BTF_ID(func, bpf_lsm_socket_post_create)
+BTF_ID(func, bpf_lsm_socket_bind)
+BTF_SET_END(lsm_cgroup_sock)
+
/**
* __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
* propagate the change to descendants
@@ -455,9 +461,24 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
/* replace_prog implies BPF_F_REPLACE, and vice versa */
return -EINVAL;
- atype = to_cgroup_bpf_attach_type(type);
- if (atype < 0)
- return -EINVAL;
+ if (prog->type == BPF_PROG_TYPE_LSM &&
+ prog->expected_attach_type == BPF_LSM_CGROUP_SOCK) {
+ int idx;
+
+ BUG_ON(lsm_cgroup_sock.cnt != CGROUP_LSM_SOCK_NUM);
+
+ idx = btf_id_set_index(&lsm_cgroup_sock, prog->aux->attach_btf_id);
+ if (idx < 0)
+ return -EINVAL;
+
+ atype = CGROUP_LSM_SOCK_START + idx;
+
+ prog->aux->cgroup_atype = atype;
+ } else {
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+ return -EINVAL;
+ }
progs = &cgrp->bpf.progs[atype];
@@ -1091,6 +1112,22 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
+int __cgroup_bpf_run_lsm_sock(u64 *regs, const struct bpf_prog *prog)
+{
+ struct socket *sock = (void *)regs[BPF_REG_0];
+ struct cgroup *cgrp;
+ struct sock *sk;
+
+ sk = sock->sk;
+ if (!sk)
+ return 0;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+
+ return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[prog->aux->cgroup_atype],
+ regs, bpf_prog_run, 0);
+}
+
/**
* __cgroup_bpf_run_filter_sk() - Run a program on a sock
* @sk: sock structure to manipulate
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35646db3d950..aacf17e3e3da 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2724,7 +2724,8 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
}
break;
case BPF_PROG_TYPE_LSM:
- if (prog->expected_attach_type != BPF_LSM_MAC) {
+ if (prog->expected_attach_type != BPF_LSM_MAC &&
+ prog->expected_attach_type != BPF_LSM_CGROUP_SOCK) {
err = -EINVAL;
goto out_put_prog;
}
@@ -3184,6 +3185,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_SK_LOOKUP;
case BPF_XDP:
return BPF_PROG_TYPE_XDP;
+ case BPF_LSM_CGROUP_SOCK:
+ return BPF_PROG_TYPE_LSM;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -3237,6 +3240,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_LSM:
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
break;
default:
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 7224691df2ec..58b92d6edf1d 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -406,6 +406,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT;
+ case BPF_LSM_CGROUP_SOCK:
case BPF_LSM_MAC:
if (!prog->aux->attach_func_proto->type)
/* The function returns void, we cannot modify its
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d7473fee247c..1563723759d9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14105,6 +14105,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
fallthrough;
case BPF_MODIFY_RETURN:
case BPF_LSM_MAC:
+ case BPF_LSM_CGROUP_SOCK:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
if (!btf_type_is_func(t)) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index afe3d0d7f5f2..286e55a2a852 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -997,6 +997,7 @@ enum bpf_attach_type {
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
BPF_PERF_EVENT,
+ BPF_LSM_CGROUP_SOCK,
__MAX_BPF_ATTACH_TYPE
};
--
2.35.1.265.g69c8d7142f-goog
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [RFC bpf-next 1/4] bpf: cgroup_sock lsm flavor
2022-02-16 0:12 ` [RFC bpf-next 1/4] " Stanislav Fomichev
@ 2022-02-17 2:38 ` Alexei Starovoitov
2022-02-17 16:21 ` sdf
0 siblings, 1 reply; 8+ messages in thread
From: Alexei Starovoitov @ 2022-02-17 2:38 UTC (permalink / raw)
To: Stanislav Fomichev; +Cc: netdev, bpf, ast, daniel, andrii
On Tue, Feb 15, 2022 at 04:12:38PM -0800, Stanislav Fomichev wrote:
> {
> @@ -1767,14 +1769,23 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
>
> /* arg1: lea rdi, [rbp - stack_size] */
> EMIT4(0x48, 0x8D, 0x7D, -stack_size);
> - /* arg2: progs[i]->insnsi for interpreter */
> - if (!p->jited)
> - emit_mov_imm64(&prog, BPF_REG_2,
> - (long) p->insnsi >> 32,
> - (u32) (long) p->insnsi);
> - /* call JITed bpf program or interpreter */
> - if (emit_call(&prog, p->bpf_func, prog))
> - return -EINVAL;
> +
> + if (p->expected_attach_type == BPF_LSM_CGROUP_SOCK) {
> + /* arg2: progs[i] */
> + emit_mov_imm64(&prog, BPF_REG_2, (long) p >> 32, (u32) (long) p);
> + if (emit_call(&prog, __cgroup_bpf_run_lsm_sock, prog))
> + return -EINVAL;
> + } else {
> + /* arg2: progs[i]->insnsi for interpreter */
> + if (!p->jited)
> + emit_mov_imm64(&prog, BPF_REG_2,
> + (long) p->insnsi >> 32,
> + (u32) (long) p->insnsi);
> +
> + /* call JITed bpf program or interpreter */
> + if (emit_call(&prog, p->bpf_func, prog))
> + return -EINVAL;
Overall I think it's a workable solution.
As far as mechanism I think it would be better
to allocate single dummy bpf_prog and use normal fmod_ret
registration mechanism instead of hacking arch trampoline bits.
Set dummy_bpf_prog->bpf_func = __cgroup_bpf_run_lsm_sock;
and keep as dummy_bpf_prog->jited = false;
From p->insnsi pointer in arg2 it's easy to go back to struct bpf_prog.
Such dummy prog might even be statically defined like dummy_bpf_prog.
Or allocated dynamically once.
It can be added as fmod_ret to multiple trampolines.
Just gut the func_model check.
As far as api the attach should probably be to a cgroup+lsm_hook pair.
link_create.target_fd will be cgroup_fd.
At prog load time attach_btf_id should probably be one
of existing bpf_lsm_* hooks.
Feels wrong to duplicate the whole set into lsm_cgroup_sock set.
It's fine to have prog->expected_attach_type == BPF_LSM_CGROUP_SOCK
to disambiguate. Will we probably only have two:
BPF_LSM_CGROUP_SOCK and BPF_LSM_CGROUP_TASK ?
> +int __cgroup_bpf_run_lsm_sock(u64 *regs, const struct bpf_prog *prog)
> +{
> + struct socket *sock = (void *)regs[BPF_REG_0];
> + struct cgroup *cgrp;
> + struct sock *sk;
> +
> + sk = sock->sk;
> + if (!sk)
> + return 0;
> +
> + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> +
> + return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[prog->aux->cgroup_atype],
> + regs, bpf_prog_run, 0);
> +}
Would it be fast enough?
We went through a bunch of optimization for normal cgroup and ended with:
if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) &&
cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS))
Here the trampoline code plus call into __cgroup_bpf_run_lsm_sock
will be there for all cgroups.
Since cgroup specific check will be inside BPF_PROG_RUN_ARRAY_CG.
I suspect it's ok, since the link_create will be for few specific lsm hooks
which are typically not in the fast path.
Unlike traditional cgroup hook like ingress that is hot.
For BPF_LSM_CGROUP_TASK it will take cgroup from current instead of sock, right?
Args access should magically work. 'regs' above should be fine for
all lsm hooks.
The typical prog:
+SEC("lsm_cgroup_sock/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+ int type, int protocol, int kern)
looks good too.
Feel natural.
I guess they can be sleepable too?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC bpf-next 1/4] bpf: cgroup_sock lsm flavor
2022-02-17 2:38 ` Alexei Starovoitov
@ 2022-02-17 16:21 ` sdf
2022-02-17 16:58 ` Alexei Starovoitov
0 siblings, 1 reply; 8+ messages in thread
From: sdf @ 2022-02-17 16:21 UTC (permalink / raw)
To: Alexei Starovoitov; +Cc: netdev, bpf, ast, daniel, andrii
On 02/16, Alexei Starovoitov wrote:
> On Tue, Feb 15, 2022 at 04:12:38PM -0800, Stanislav Fomichev wrote:
> > {
> > @@ -1767,14 +1769,23 @@ static int invoke_bpf_prog(const struct
> btf_func_model *m, u8 **pprog,
> >
> > /* arg1: lea rdi, [rbp - stack_size] */
> > EMIT4(0x48, 0x8D, 0x7D, -stack_size);
> > - /* arg2: progs[i]->insnsi for interpreter */
> > - if (!p->jited)
> > - emit_mov_imm64(&prog, BPF_REG_2,
> > - (long) p->insnsi >> 32,
> > - (u32) (long) p->insnsi);
> > - /* call JITed bpf program or interpreter */
> > - if (emit_call(&prog, p->bpf_func, prog))
> > - return -EINVAL;
> > +
> > + if (p->expected_attach_type == BPF_LSM_CGROUP_SOCK) {
> > + /* arg2: progs[i] */
> > + emit_mov_imm64(&prog, BPF_REG_2, (long) p >> 32, (u32) (long) p);
> > + if (emit_call(&prog, __cgroup_bpf_run_lsm_sock, prog))
> > + return -EINVAL;
> > + } else {
> > + /* arg2: progs[i]->insnsi for interpreter */
> > + if (!p->jited)
> > + emit_mov_imm64(&prog, BPF_REG_2,
> > + (long) p->insnsi >> 32,
> > + (u32) (long) p->insnsi);
> > +
> > + /* call JITed bpf program or interpreter */
> > + if (emit_call(&prog, p->bpf_func, prog))
> > + return -EINVAL;
> Overall I think it's a workable solution.
> As far as mechanism I think it would be better
> to allocate single dummy bpf_prog and use normal fmod_ret
> registration mechanism instead of hacking arch trampoline bits.
> Set dummy_bpf_prog->bpf_func = __cgroup_bpf_run_lsm_sock;
> and keep as dummy_bpf_prog->jited = false;
> From p->insnsi pointer in arg2 it's easy to go back to struct bpf_prog.
> Such dummy prog might even be statically defined like dummy_bpf_prog.
> Or allocated dynamically once.
> It can be added as fmod_ret to multiple trampolines.
> Just gut the func_model check.
Oooh, that's much cleaner, thanks!
> As far as api the attach should probably be to a cgroup+lsm_hook pair.
> link_create.target_fd will be cgroup_fd.
> At prog load time attach_btf_id should probably be one
> of existing bpf_lsm_* hooks.
> Feels wrong to duplicate the whole set into lsm_cgroup_sock set.
lsm_cgroup_sock is there to further limit which particular lsm
hooks BPF_LSM_CGROUP_SOCK can use. I guess I can maybe look at
BTF's first argument to verify that it's 'struct socket'? Let
me try to see whether it's a good alternative..
> It's fine to have prog->expected_attach_type == BPF_LSM_CGROUP_SOCK
> to disambiguate. Will we probably only have two:
> BPF_LSM_CGROUP_SOCK and BPF_LSM_CGROUP_TASK ?
I hope so. Unless objects other than socket and task can have cgroup
association.
> > +int __cgroup_bpf_run_lsm_sock(u64 *regs, const struct bpf_prog *prog)
> > +{
> > + struct socket *sock = (void *)regs[BPF_REG_0];
> > + struct cgroup *cgrp;
> > + struct sock *sk;
> > +
> > + sk = sock->sk;
> > + if (!sk)
> > + return 0;
> > +
> > + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> > +
> > + return
> BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[prog->aux->cgroup_atype],
> > + regs, bpf_prog_run, 0);
> > +}
> Would it be fast enough?
> We went through a bunch of optimization for normal cgroup and ended with:
> if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) &&
> cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS))
> Here the trampoline code plus call into __cgroup_bpf_run_lsm_sock
> will be there for all cgroups.
> Since cgroup specific check will be inside BPF_PROG_RUN_ARRAY_CG.
> I suspect it's ok, since the link_create will be for few specific lsm
> hooks
> which are typically not in the fast path.
> Unlike traditional cgroup hook like ingress that is hot.
Right, cgroup_bpf_enabled() is not needed because lsm is by definition
off/unattached by default. Seems like we can add cgroup_bpf_sock_enabled()
to
__cgroup_bpf_run_lsm_sock.
> For BPF_LSM_CGROUP_TASK it will take cgroup from current instead of sock,
> right?
Right. Seems like the only difference is where we get the cgroup pointer
from: current vs sock->cgroup. Although, I'm a bit unsure whether to
allow hooks that are clearly sock-cgroup-based to use
BPF_LSM_CGROUP_TASK. For example, should we allow
BPF_LSM_CGROUP_TASK to attach to that socket_post_create? I'd prohibit that
at
least initially to avoid some subtle 'why sometimes my
programs trigger on the wrong cgroup' types of issues.
> Args access should magically work. 'regs' above should be fine for
> all lsm hooks.
> The typical prog:
> +SEC("lsm_cgroup_sock/socket_post_create")
> +int BPF_PROG(socket_post_create, struct socket *sock, int family,
> + int type, int protocol, int kern)
> looks good too.
> Feel natural.
> I guess they can be sleepable too?
Haven't gone into the sleepable world, but I don't see any reason why
there couldn't be a sleepable variation.
Thank you for a review!
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC bpf-next 1/4] bpf: cgroup_sock lsm flavor
2022-02-17 16:21 ` sdf
@ 2022-02-17 16:58 ` Alexei Starovoitov
0 siblings, 0 replies; 8+ messages in thread
From: Alexei Starovoitov @ 2022-02-17 16:58 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: Network Development, bpf, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko
On Thu, Feb 17, 2022 at 8:21 AM <sdf@google.com> wrote:
> > As far as api the attach should probably be to a cgroup+lsm_hook pair.
> > link_create.target_fd will be cgroup_fd.
> > At prog load time attach_btf_id should probably be one
> > of existing bpf_lsm_* hooks.
> > Feels wrong to duplicate the whole set into lsm_cgroup_sock set.
>
> lsm_cgroup_sock is there to further limit which particular lsm
> hooks BPF_LSM_CGROUP_SOCK can use. I guess I can maybe look at
> BTF's first argument to verify that it's 'struct socket'? Let
> me try to see whether it's a good alternative..
That's a great idea.
We probably would need 2 flavors of __cgroup_bpf_run_lsm_sock wrapper.
One for 'struct socket *' and another for 'struct sock *'.
In lsm hooks they come as the first argument and BTF will tell us what it is.
There are exceptions like socket_create hook
which would have to use current->cgroup.
So ideally we can have a single attach_type BPF_LSM_CGROUP
and use proper wrapper socket/sock/current depending on BTF of the lsm hook.
> > Would it be fast enough?
> > We went through a bunch of optimization for normal cgroup and ended with:
> > if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) &&
> > cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS))
> > Here the trampoline code plus call into __cgroup_bpf_run_lsm_sock
> > will be there for all cgroups.
> > Since cgroup specific check will be inside BPF_PROG_RUN_ARRAY_CG.
> > I suspect it's ok, since the link_create will be for few specific lsm
> > hooks
> > which are typically not in the fast path.
> > Unlike traditional cgroup hook like ingress that is hot.
>
> Right, cgroup_bpf_enabled() is not needed because lsm is by definition
> off/unattached by default. Seems like we can add cgroup_bpf_sock_enabled()
> to
> __cgroup_bpf_run_lsm_sock.
I guess we can, but that feels redundant.
If we attach the wrapper to a particular lsm hook then cgroup_bpf_sock
is surely enabled.
>
> > For BPF_LSM_CGROUP_TASK it will take cgroup from current instead of sock,
> > right?
>
> Right. Seems like the only difference is where we get the cgroup pointer
> from: current vs sock->cgroup. Although, I'm a bit unsure whether to
> allow hooks that are clearly sock-cgroup-based to use
> BPF_LSM_CGROUP_TASK. For example, should we allow
> BPF_LSM_CGROUP_TASK to attach to that socket_post_create? I'd prohibit that
> at
> least initially to avoid some subtle 'why sometimes my
> programs trigger on the wrong cgroup' types of issues.
Agree. With a single BPF_LSM_CGROUP attach type will get correct
behavior automatically.
Looking forward to non rfc patches. Exciting feature!
^ permalink raw reply [flat|nested] 8+ messages in thread
* [RFC bpf-next 2/4] bpf: allow writing to sock->sk_priority from lsm progtype
2022-02-16 0:12 [RFC bpf-next 0/4] bpf: cgroup_sock lsm flavor Stanislav Fomichev
2022-02-16 0:12 ` [RFC bpf-next 1/4] " Stanislav Fomichev
@ 2022-02-16 0:12 ` Stanislav Fomichev
2022-02-16 0:12 ` [RFC bpf-next 3/4] libbpf: add lsm_cgoup_sock type Stanislav Fomichev
2022-02-16 0:12 ` [RFC bpf-next 4/4] selftest: lsm_cgroup_sock sample usage Stanislav Fomichev
3 siblings, 0 replies; 8+ messages in thread
From: Stanislav Fomichev @ 2022-02-16 0:12 UTC (permalink / raw)
To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
Open up enough fields for selftests. Will be extended in the real
patch series to match bpf_sock fields.
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
kernel/bpf/bpf_lsm.c | 49 +++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/verifier.c | 3 ++-
2 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 9e4ecc990647..1a68661e1b9c 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -222,7 +222,56 @@ bool bpf_lsm_is_sleepable_hook(u32 btf_id)
const struct bpf_prog_ops lsm_prog_ops = {
};
+static int lsm_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ const struct btf_type *sock_type;
+ struct btf *btf_vmlinux;
+ s32 type_id;
+ size_t end;
+
+ if (atype == BPF_READ)
+ return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
+
+ btf_vmlinux = bpf_get_btf_vmlinux();
+
+ type_id = btf_find_by_name_kind(btf_vmlinux, "sock", BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+
+ sock_type = btf_type_by_id(btf_vmlinux, type_id);
+
+ if (t != sock_type) {
+ bpf_log(log, "only 'struct sock' writes are supported\n");
+ return -EACCES;
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct sock, sk_priority):
+ end = offsetofend(struct sock, sk_priority);
+ break;
+ default:
+ bpf_log(log, "no write support to 'struct sock' at off %d\n", off);
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log,
+ "write access at off %d with size %d beyond the member of 'struct sock' ended at %zu\n",
+ off, size, end);
+ return -EACCES;
+ }
+
+ return NOT_INIT;
+}
+
const struct bpf_verifier_ops lsm_verifier_ops = {
.get_func_proto = bpf_lsm_func_proto,
.is_valid_access = btf_ctx_access,
+ .btf_struct_access = lsm_btf_struct_access,
};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1563723759d9..b8991460d17d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12801,7 +12801,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code = BPF_LDX | BPF_PROBE_MEM |
BPF_SIZE((insn)->code);
env->prog->aux->num_exentries++;
- } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
+ } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS &&
+ resolve_prog_type(env->prog) != BPF_PROG_TYPE_LSM) {
verbose(env, "Writes through BTF pointers are not allowed\n");
return -EINVAL;
}
--
2.35.1.265.g69c8d7142f-goog
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC bpf-next 3/4] libbpf: add lsm_cgoup_sock type
2022-02-16 0:12 [RFC bpf-next 0/4] bpf: cgroup_sock lsm flavor Stanislav Fomichev
2022-02-16 0:12 ` [RFC bpf-next 1/4] " Stanislav Fomichev
2022-02-16 0:12 ` [RFC bpf-next 2/4] bpf: allow writing to sock->sk_priority from lsm progtype Stanislav Fomichev
@ 2022-02-16 0:12 ` Stanislav Fomichev
2022-02-16 0:12 ` [RFC bpf-next 4/4] selftest: lsm_cgroup_sock sample usage Stanislav Fomichev
3 siblings, 0 replies; 8+ messages in thread
From: Stanislav Fomichev @ 2022-02-16 0:12 UTC (permalink / raw)
To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
lsm+expected_attach_type=BPF_LSM_CGROUP_SOCK
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
tools/lib/bpf/libbpf.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2262bcdfee92..840a0274240c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8615,6 +8615,7 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace),
SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
+ SEC_DEF("lsm_cgroup_sock/", LSM, BPF_LSM_CGROUP_SOCK, SEC_ATTACH_BTF, attach_lsm),
SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
@@ -8930,6 +8931,7 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
*kind = BTF_KIND_TYPEDEF;
break;
case BPF_LSM_MAC:
+ case BPF_LSM_CGROUP_SOCK:
*prefix = BTF_LSM_PREFIX;
*kind = BTF_KIND_FUNC;
break;
--
2.35.1.265.g69c8d7142f-goog
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC bpf-next 4/4] selftest: lsm_cgroup_sock sample usage
2022-02-16 0:12 [RFC bpf-next 0/4] bpf: cgroup_sock lsm flavor Stanislav Fomichev
` (2 preceding siblings ...)
2022-02-16 0:12 ` [RFC bpf-next 3/4] libbpf: add lsm_cgoup_sock type Stanislav Fomichev
@ 2022-02-16 0:12 ` Stanislav Fomichev
3 siblings, 0 replies; 8+ messages in thread
From: Stanislav Fomichev @ 2022-02-16 0:12 UTC (permalink / raw)
To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
Implement two things I'd like to get from this:
1. apply default sk_priority policy
2. permit TX-only AF_PACKET socket
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
.../bpf/prog_tests/lsm_cgroup_sock.c | 81 +++++++++++++++++++
.../selftests/bpf/progs/lsm_cgroup_sock.c | 55 +++++++++++++
2 files changed, 136 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/lsm_cgroup_sock.c
create mode 100644 tools/testing/selftests/bpf/progs/lsm_cgroup_sock.c
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup_sock.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup_sock.c
new file mode 100644
index 000000000000..4afc40282b15
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup_sock.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+
+#include "lsm_cgroup_sock.skel.h"
+
+void test_lsm_cgroup_sock(void)
+{
+ int post_create_prog_fd = -1, bind_prog_fd = -1;
+ struct lsm_cgroup_sock *skel = NULL;
+ int cgroup_fd, err, fd, prio;
+ socklen_t socklen;
+
+
+ cgroup_fd = test__join_cgroup("/sock_policy");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto close_skel;
+
+ skel = lsm_cgroup_sock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto close_cgroup;
+
+ err = lsm_cgroup_sock__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto close_cgroup;
+
+ post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+ bind_prog_fd = bpf_program__fd(skel->progs.socket_bind);
+
+ err = bpf_prog_attach(post_create_prog_fd, cgroup_fd, BPF_LSM_CGROUP_SOCK, 0);
+ if (!ASSERT_OK(err, "attach post_create_prog_fd"))
+ goto close_cgroup;
+
+ err = bpf_prog_attach(bind_prog_fd, cgroup_fd, BPF_LSM_CGROUP_SOCK, 0);
+ if (!ASSERT_OK(err, "attach bind_prog_fd"))
+ goto detach_cgroup;
+
+ /* AF_INET6 gets default policy (sk_priority).
+ */
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+ goto detach_cgroup;
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, "getsockopt");
+ ASSERT_EQ(prio, 123, "sk_priority");
+
+ close(fd);
+
+ /* TX-only AF_PACKET is allowed.
+ */
+
+ ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0, "socket(AF_PACKET, ..., ETH_P_ALL)");
+
+ fd = socket(AF_PACKET, SOCK_RAW, 0);
+ ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)");
+
+ /* TX-only AF_PACKET can not be rebound.
+ */
+
+ struct sockaddr_ll sa = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htons(ETH_P_ALL),
+ };
+ ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0, "bind(ETH_P_ALL)");
+
+ close(fd);
+
+detach_cgroup:
+ bpf_prog_detach2(post_create_prog_fd, cgroup_fd, BPF_LSM_CGROUP_SOCK);
+ bpf_prog_detach2(bind_prog_fd, cgroup_fd, BPF_LSM_CGROUP_SOCK);
+
+close_cgroup:
+ close(cgroup_fd);
+close_skel:
+ lsm_cgroup_sock__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_sock.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_sock.c
new file mode 100644
index 000000000000..7fe3da5a8dc7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup_sock.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef AF_PACKET
+#define AF_PACKET 17
+#endif
+
+#ifndef EPERM
+#define EPERM 1
+#endif
+
+SEC("lsm_cgroup_sock/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ struct sock *sk;
+
+ /* Reject non-tx-only AF_PACKET.
+ */
+ if (family == AF_PACKET && protocol != 0)
+ return 0; /* EPERM */
+
+ sk = sock->sk;
+ if (!sk)
+ return 1;
+
+ /* The rest of the sockets get default policy.
+ */
+ sk->sk_priority = 123;
+ return 1;
+}
+
+SEC("lsm_cgroup_sock/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ struct sockaddr_ll sa = {};
+
+ if (sock->sk->__sk_common.skc_family != AF_PACKET)
+ return 1;
+
+ if (sock->sk->sk_kern_sock)
+ return 1;
+
+ bpf_probe_read_kernel(&sa, sizeof(sa), address);
+ if (sa.sll_protocol)
+ return 0; /* EPERM */
+
+ return 1;
+}
--
2.35.1.265.g69c8d7142f-goog
^ permalink raw reply related [flat|nested] 8+ messages in thread