From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-10.1 required=3.0 tests=DKIMWL_WL_HIGH,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,INCLUDES_PATCH,MAILING_LIST_MULTI,SIGNED_OFF_BY, SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id D96E5C3A5AB for ; Wed, 4 Sep 2019 18:43:44 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id A2BBE2077B for ; Wed, 4 Sep 2019 18:43:44 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1567622624; bh=q2abmyQ/CYQ7x0mZkhvQoElyuPLqvg2z9k2xOd6SEuU=; h=From:To:CC:Subject:Date:In-Reply-To:References:List-ID:From; b=fyHAK0jDFIKySnubYzr4RebHVxFnT90wn44vvnjJ3yC/GwH37DI338l/Xi+P0weF3 bLjKqrj51oR0zhaJROxxZahFr/h1yMEa7uAZeHwEo3IN0sRGXwrCFpqQBiCsqK04Yf m1ZHL6kMjRZ7TSAML+Wo5DMLM66aYv89WTfH5u/c= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2387433AbfIDSno convert rfc822-to-8bit (ORCPT ); Wed, 4 Sep 2019 14:43:44 -0400 Received: from mx0b-00082601.pphosted.com ([67.231.153.30]:53884 "EHLO mx0b-00082601.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1733107AbfIDSnn (ORCPT ); Wed, 4 Sep 2019 14:43:43 -0400 Received: from pps.filterd (m0109331.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.16.0.42/8.16.0.42) with SMTP id x84IhOgq022654 for ; Wed, 4 Sep 2019 11:43:40 -0700 Received: from mail.thefacebook.com (mailout.thefacebook.com [199.201.64.23]) by mx0a-00082601.pphosted.com with ESMTP id 2utfc513c5-11 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Wed, 04 Sep 2019 11:43:40 -0700 Received: from mx-out.facebook.com (2620:10d:c081:10::13) by mail.thefacebook.com (2620:10d:c081:35::128) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA) id 15.1.1713.5; Wed, 4 Sep 2019 11:43:39 -0700 Received: by devbig007.ftw2.facebook.com (Postfix, from userid 572438) id 5A4147609CE; Wed, 4 Sep 2019 11:43:37 -0700 (PDT) Smtp-Origin-Hostprefix: devbig From: Alexei Starovoitov Smtp-Origin-Hostname: devbig007.ftw2.facebook.com To: CC: , , , , , , Smtp-Origin-Cluster: ftw2c04 Subject: [PATCH v3 bpf-next 2/3] bpf: implement CAP_BPF Date: Wed, 4 Sep 2019 11:43:34 -0700 Message-ID: <20190904184335.360074-2-ast@kernel.org> X-Mailer: git-send-email 2.20.0 In-Reply-To: <20190904184335.360074-1-ast@kernel.org> References: <20190904184335.360074-1-ast@kernel.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8BIT X-FB-Internal: Safe Content-Type: text/plain X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.70,1.0.8 definitions=2019-09-04_05:2019-09-04,2019-09-04 signatures=0 X-Proofpoint-Spam-Details: rule=fb_default_notspam policy=fb_default score=0 suspectscore=4 clxscore=1015 impostorscore=0 priorityscore=1501 bulkscore=0 phishscore=0 adultscore=0 mlxlogscore=999 malwarescore=0 spamscore=0 mlxscore=0 lowpriorityscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-1906280000 definitions=main-1909040187 X-FB-Internal: deliver Sender: bpf-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: bpf@vger.kernel.org Implement permissions as stated in uapi/linux/capability.h Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 2 +- kernel/bpf/cgroup.c | 2 +- kernel/bpf/core.c | 4 +- kernel/bpf/hashtab.c | 4 +- kernel/bpf/lpm_trie.c | 2 +- kernel/bpf/queue_stack_maps.c | 2 +- kernel/bpf/reuseport_array.c | 2 +- kernel/bpf/stackmap.c | 2 +- kernel/bpf/syscall.c | 32 ++++++++------ kernel/bpf/verifier.c | 4 +- kernel/trace/bpf_trace.c | 2 +- net/core/bpf_sk_storage.c | 2 +- net/core/filter.c | 10 +++-- tools/testing/selftests/bpf/test_verifier.c | 46 +++++++++++++++++---- 14 files changed, 77 insertions(+), 39 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1c65ce0098a9..149f868a02dc 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -73,7 +73,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int ret, numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; - bool unpriv = !capable(CAP_SYS_ADMIN); + bool unpriv = !capable_bpf(); u64 cost, array_size, mask64; struct bpf_map_memory mem; struct bpf_array *array; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6a6a154cfa7b..9c659ba5c146 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -795,7 +795,7 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; case BPF_FUNC_trace_printk: - if (capable(CAP_SYS_ADMIN)) + if (capable_bpf_tracing()) return bpf_get_trace_printk_proto(); /* fall through */ default: diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8191a7db2777..6b53c064e8e6 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -646,7 +646,7 @@ static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || - !capable(CAP_SYS_ADMIN)) + !capable_bpf()) return; spin_lock_bh(&bpf_lock); @@ -768,7 +768,7 @@ static int bpf_jit_charge_modmem(u32 pages) { if (atomic_long_add_return(pages, &bpf_jit_current) > (bpf_jit_limit >> PAGE_SHIFT)) { - if (!capable(CAP_SYS_ADMIN)) { + if (!capable_bpf()) { atomic_long_sub(pages, &bpf_jit_current); return -EPERM; } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 22066a62c8c9..0fae5c45f425 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -244,9 +244,9 @@ static int htab_map_alloc_check(union bpf_attr *attr) BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != offsetof(struct htab_elem, hash_node.pprev)); - if (lru && !capable(CAP_SYS_ADMIN)) + if (lru && !capable_bpf()) /* LRU implementation is much complicated than other - * maps. Hence, limit to CAP_SYS_ADMIN for now. + * maps. Hence, limit to CAP_BPF. */ return -EPERM; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 56e6c75d354d..11da3be8a4e5 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -543,7 +543,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) u64 cost = sizeof(*trie), cost_per_node; int ret; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return ERR_PTR(-EPERM); /* check sanity of attributes */ diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index f697647ceb54..d83afac32863 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -45,7 +45,7 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) /* Called from syscall */ static int queue_stack_map_alloc_check(union bpf_attr *attr) { - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return -EPERM; /* check sanity of attributes */ diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 50c083ba978c..b268fe4b2972 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -154,7 +154,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) struct bpf_map_memory mem; u64 array_size; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return ERR_PTR(-EPERM); array_size = sizeof(*array); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 052580c33d26..477063c63b27 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -90,7 +90,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) u64 cost, n_buckets; int err; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_tracing()) return ERR_PTR(-EPERM); if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ca60eafa6922..2b832eeafda9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1176,7 +1176,7 @@ static int map_freeze(const union bpf_attr *attr) err = -EBUSY; goto err_put; } - if (!capable(CAP_SYS_ADMIN)) { + if (!capable_bpf()) { err = -EPERM; goto err_put; } @@ -1635,7 +1635,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && - !capable(CAP_SYS_ADMIN)) + !capable_bpf()) return -EPERM; /* copy eBPF program license from user space */ @@ -1648,11 +1648,11 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) is_gpl = license_is_gpl_compatible(license); if (attr->insn_cnt == 0 || - attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) + attr->insn_cnt > (capable_bpf() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) return -E2BIG; if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && - !capable(CAP_SYS_ADMIN)) + !capable_bpf()) return -EPERM; bpf_prog_load_fixup_attach_type(attr); @@ -1803,6 +1803,9 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) char tp_name[128]; int tp_fd, err; + if (!capable_bpf_tracing()) + return -EPERM; + if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name), sizeof(tp_name) - 1) < 0) return -EFAULT; @@ -2081,7 +2084,10 @@ static int bpf_prog_test_run(const union bpf_attr *attr, struct bpf_prog *prog; int ret = -ENOTSUPP; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf_net_admin) + /* test_run callback is available for networking progs only. + * Add capable_bpf_tracing() above when tracing progs become runable. + */ return -EPERM; if (CHECK_ATTR(BPF_PROG_TEST_RUN)) return -EINVAL; @@ -2118,7 +2124,7 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr, if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return -EPERM; next_id++; @@ -2144,7 +2150,7 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return -EPERM; spin_lock_bh(&prog_idr_lock); @@ -2178,7 +2184,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) attr->open_flags & ~BPF_OBJ_FLAG_MASK) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return -EPERM; f_flags = bpf_get_file_flag(attr->open_flags); @@ -2353,7 +2359,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.run_time_ns = stats.nsecs; info.run_cnt = stats.cnt; - if (!capable(CAP_SYS_ADMIN)) { + if (!capable_bpf()) { info.jited_prog_len = 0; info.xlated_prog_len = 0; info.nr_jited_ksyms = 0; @@ -2671,7 +2677,7 @@ static int bpf_btf_load(const union bpf_attr *attr) if (CHECK_ATTR(BPF_BTF_LOAD)) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return -EPERM; return btf_new_fd(attr); @@ -2684,7 +2690,7 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return -EPERM; return btf_get_fd_by_id(attr->btf_id); @@ -2753,7 +2759,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (CHECK_ATTR(BPF_TASK_FD_QUERY)) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf_tracing()) return -EPERM; if (attr->task_fd_query.flags != 0) @@ -2821,7 +2827,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz union bpf_attr attr = {}; int err; - if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) + if (sysctl_unprivileged_bpf_disabled && !capable_bpf()) return -EPERM; err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f340cfe53c6e..b2a229557602 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -987,7 +987,7 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg) reg->umax_value = U64_MAX; /* constant backtracking is enabled for root only for now */ - reg->precise = capable(CAP_SYS_ADMIN) ? false : true; + reg->precise = capable_bpf() ? false : true; } /* Mark a register as having a completely unknown (scalar) value. */ @@ -9233,7 +9233,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; - is_priv = capable(CAP_SYS_ADMIN); + is_priv = capable_bpf(); /* grab the mutex to protect few globals used by verifier */ if (!is_priv) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ca1255d14576..cdf8d6c8a430 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1246,7 +1246,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) u32 *ids, prog_cnt, ids_len; int ret; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf_tracing()) return -EPERM; if (event->attr.type != PERF_TYPE_TRACEPOINT) return -EINVAL; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index da5639a5bd3b..aa74be21f5b6 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -616,7 +616,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) !attr->btf_key_type_id || !attr->btf_value_type_id) return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return -EPERM; if (attr->value_size >= KMALLOC_MAX_SIZE - diff --git a/net/core/filter.c b/net/core/filter.c index 17bc9af8f156..fae09b4d4da4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5990,7 +5990,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) break; } - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return NULL; switch (func_id) { @@ -5999,7 +5999,9 @@ bpf_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_spin_unlock: return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: - return bpf_get_trace_printk_proto(); + if (capable_bpf_tracing()) + return bpf_get_trace_printk_proto(); + /* fall through */ default: return NULL; } @@ -6563,7 +6565,7 @@ static bool cg_skb_is_valid_access(int off, int size, return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return false; break; } @@ -6575,7 +6577,7 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; case bpf_ctx_range(struct __sk_buff, tstamp): - if (!capable(CAP_SYS_ADMIN)) + if (!capable_bpf()) return false; break; default: diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index d27fd929abb9..0d5567962c4e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -807,10 +807,20 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, } } +struct libcap { + struct __user_cap_header_struct hdr; + struct __user_cap_data_struct data[2]; +}; + static int set_admin(bool admin) { cap_t caps; - const cap_value_t cap_val = CAP_SYS_ADMIN; + /* need CAP_BPF to load progs and CAP_NET_ADMIN to run networking progs, + * and CAP_TRACING to create stackmap + */ + const cap_value_t cap_net_admin = CAP_NET_ADMIN; + const cap_value_t cap_sys_admin = CAP_SYS_ADMIN; + struct libcap *cap; int ret = -1; caps = cap_get_proc(); @@ -818,11 +828,26 @@ static int set_admin(bool admin) perror("cap_get_proc"); return -1; } - if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val, + cap = (struct libcap *)caps; + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) { + perror("cap_set_flag clear admin"); + goto out; + } + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin, admin ? CAP_SET : CAP_CLEAR)) { - perror("cap_set_flag"); + perror("cap_set_flag set_or_clear net"); goto out; } + /* libcap is likely old and simply ignores CAP_BPF and CAP_TRACING, + * so update effective bits manually + */ + if (admin) { + cap->data[1].effective |= 1 << (38 /* CAP_BPF */ - 32); + cap->data[1].effective |= 1 << (39 /* CAP_TRACING */ - 32); + } else { + cap->data[1].effective &= ~(1 << (38 - 32)); + cap->data[1].effective &= ~(1 << (39 - 32)); + } if (cap_set_proc(caps)) { perror("cap_set_proc"); goto out; @@ -1051,9 +1076,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv, static bool is_admin(void) { + cap_flag_value_t net_priv = CAP_CLEAR; + bool tracing_priv = false; + bool bpf_priv = false; + struct libcap *cap; cap_t caps; - cap_flag_value_t sysadmin = CAP_CLEAR; - const cap_value_t cap_val = CAP_SYS_ADMIN; #ifdef CAP_IS_SUPPORTED if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) { @@ -1066,11 +1093,14 @@ static bool is_admin(void) perror("cap_get_proc"); return false; } - if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin)) - perror("cap_get_flag"); + cap = (struct libcap *)caps; + bpf_priv = cap->data[1].effective & (1 << (38/* CAP_BPF */ - 32)); + tracing_priv = cap->data[1].effective & (1 << (39/* CAP_TRACING */ - 32)); + if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv)) + perror("cap_get_flag NET"); if (cap_free(caps)) perror("cap_free"); - return (sysadmin == CAP_SET); + return bpf_priv && tracing_priv && net_priv == CAP_SET; } static void get_unpriv_disabled() -- 2.20.0