bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [WIP 0/4] bpf: A bit of progress toward unprivileged use
@ 2019-08-05 21:29 Andy Lutomirski
  2019-08-05 21:29 ` [WIP 1/4] bpf: Respect persistent map and prog access modes Andy Lutomirski
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Andy Lutomirski @ 2019-08-05 21:29 UTC (permalink / raw)
  To: LKML, Alexei Starovoitov
  Cc: Song Liu, Kees Cook, Networking, bpf, Daniel Borkmann,
	Alexei Starovoitov, Kernel Team, Lorenz Bauer, Jann Horn,
	Greg KH, Linux API, LSM List, Andy Lutomirski

Other than the mknod() patch, this is not ready for prime time.  These
patches try to make progress toward making bpf() more useful without
privilege

Andy Lutomirski (4):
  bpf: Respect persistent map and prog access modes
  bpf: Don't require mknod() permission to pin an object
  bpf: Add a way to mark functions as requiring privilege
  bpf: Allow creating all program types without privilege

 include/linux/bpf.h          | 30 +++++++++++++++-----
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/arraymap.c        |  8 +++++-
 kernel/bpf/cgroup.c          |  6 +++-
 kernel/bpf/inode.c           | 29 +++++++++++--------
 kernel/bpf/syscall.c         | 54 +++++++++++++++++++++++++-----------
 kernel/bpf/verifier.c        |  8 ++++++
 kernel/events/core.c         |  5 ++--
 kernel/trace/bpf_trace.c     |  1 +
 net/core/dev.c               |  4 ++-
 net/core/filter.c            |  8 ++++--
 net/netfilter/xt_bpf.c       |  5 ++--
 net/packet/af_packet.c       |  2 +-
 13 files changed, 115 insertions(+), 46 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [WIP 1/4] bpf: Respect persistent map and prog access modes
  2019-08-05 21:29 [WIP 0/4] bpf: A bit of progress toward unprivileged use Andy Lutomirski
@ 2019-08-05 21:29 ` Andy Lutomirski
  2019-08-05 21:29 ` [WIP 2/4] bpf: Don't require mknod() permission to pin an object Andy Lutomirski
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Andy Lutomirski @ 2019-08-05 21:29 UTC (permalink / raw)
  To: LKML, Alexei Starovoitov
  Cc: Song Liu, Kees Cook, Networking, bpf, Daniel Borkmann,
	Alexei Starovoitov, Kernel Team, Lorenz Bauer, Jann Horn,
	Greg KH, Linux API, LSM List, Andy Lutomirski

In the interest of making bpf() more useful by unprivileged users,
this patch teaches bpf to respect access modes on map and prog
inodes.  The permissions are:

R on a map: read the map
W on a map: write the map

Referencing a map from a program should require RW.

R on a prog: Read or introspect the prog
W on a prog: Attach the prog to something

Test-running a prog is a form of introspection, so it requires RW.
Detaching a prog merely uses the fd for identification, so neither R
nor W is needed.

This is likely incomplete, and it has some comments that should be
removed.

This patch uses WRITE instead of EXEC as the permission needed to
run (by attaching or test-running) a program.  EXEC seems nicer, but
O_MAYEXEC isn't merged, which makes using X awkward.
---
 include/linux/bpf.h    | 15 +++++++------
 kernel/bpf/arraymap.c  |  8 ++++++-
 kernel/bpf/cgroup.c    |  6 ++++-
 kernel/bpf/inode.c     | 25 ++++++++++++++-------
 kernel/bpf/syscall.c   | 51 ++++++++++++++++++++++++++++++------------
 kernel/events/core.c   |  5 +++--
 net/core/dev.c         |  4 +++-
 net/core/filter.c      |  8 ++++---
 net/netfilter/xt_bpf.c |  5 +++--
 net/packet/af_packet.c |  2 +-
 10 files changed, 89 insertions(+), 40 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 18f4cc2c6acd..2d5e1a4dff6c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -630,9 +630,9 @@ extern const struct bpf_prog_ops bpf_offload_prog_ops;
 extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
 extern const struct bpf_verifier_ops xdp_analyzer_ops;
 
-struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_get(u32 ufd, int mask);
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
-				       bool attach_drv);
+				       bool attach_drv, int mask);
 struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
 void bpf_prog_sub(struct bpf_prog *prog, int i);
 struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
@@ -662,7 +662,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
 extern int sysctl_unprivileged_bpf_disabled;
 
 int bpf_map_new_fd(struct bpf_map *map, int flags);
-int bpf_prog_new_fd(struct bpf_prog *prog);
+int bpf_prog_new_fd(struct bpf_prog *prog, int flags);
 
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
@@ -733,7 +733,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
 		attr->numa_node : NUMA_NO_NODE;
 }
 
-struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type, int mask);
 int array_map_alloc_check(union bpf_attr *attr);
 
 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
@@ -850,7 +850,7 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
 }
 
 static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
-				enum bpf_prog_type type)
+				enum bpf_prog_type type, int mask)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
@@ -878,9 +878,10 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 #endif /* CONFIG_BPF_SYSCALL */
 
 static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
-						 enum bpf_prog_type type)
+						 enum bpf_prog_type type,
+						 int mask)
 {
-	return bpf_prog_get_type_dev(ufd, type, false);
+	return bpf_prog_get_type_dev(ufd, type, false, mask);
 }
 
 bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1c65ce0098a9..7e17a5d42110 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -522,6 +522,10 @@ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
 }
 
 /* only called from syscall */
+/*
+ * XXX: it's totally unclear to me what this ends up doing with the fd
+ * in general.
+ */
 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
 				 void *key, void *value, u64 map_flags)
 {
@@ -569,7 +573,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
 				   struct file *map_file, int fd)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
-	struct bpf_prog *prog = bpf_prog_get(fd);
+
+	/* XXX: what, exactly, does this end up doing to the prog in question? */
+	struct bpf_prog *prog = bpf_prog_get(fd, FMODE_READ | FMODE_WRITE);
 
 	if (IS_ERR(prog))
 		return prog;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 0a00eaca6fae..1450c3bdab82 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -562,7 +562,11 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
 	if (IS_ERR(cgrp))
 		return PTR_ERR(cgrp);
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	/*
+	 * No particular access required -- this only uses the fd to identify
+	 * a program, not to do anything with the program.
+	 */
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype, 0);
 	if (IS_ERR(prog))
 		prog = NULL;
 
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index cc0d0cf114e3..cb07736b33ae 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -58,7 +58,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
 	}
 }
 
-static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
+static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type, int mask)
 {
 	void *raw;
 
@@ -66,7 +66,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
 	raw = bpf_map_get_with_uref(ufd);
 	if (IS_ERR(raw)) {
 		*type = BPF_TYPE_PROG;
-		raw = bpf_prog_get(ufd);
+		raw = bpf_prog_get(ufd, mask);
 	}
 
 	return raw;
@@ -430,7 +430,12 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
 	if (IS_ERR(pname))
 		return PTR_ERR(pname);
 
-	raw = bpf_fd_probe_obj(ufd, &type);
+	/*
+	 * Pinning an object effectively grants the caller all access, because
+	 * the caller ends up owning the inode.  So require all access.
+	 * XXX: If we use FMODE_EXEC, we should require FMODE_EXEC too.
+	 */
+	raw = bpf_fd_probe_obj(ufd, &type, FMODE_READ | FMODE_WRITE);
 	if (IS_ERR(raw)) {
 		ret = PTR_ERR(raw);
 		goto out;
@@ -456,6 +461,10 @@ static void *bpf_obj_do_get(const struct filename *pathname,
 	if (ret)
 		return ERR_PTR(ret);
 
+	/*
+	 * XXX: O_MAYEXEC doesn't exist, which is problematic here if we
+	 * want to use FMODE_EXEC.
+	 */
 	inode = d_backing_inode(path.dentry);
 	ret = inode_permission(inode, ACC_MODE(flags));
 	if (ret)
@@ -499,7 +508,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 	}
 
 	if (type == BPF_TYPE_PROG)
-		ret = bpf_prog_new_fd(raw);
+		ret = bpf_prog_new_fd(raw, f_flags);
 	else if (type == BPF_TYPE_MAP)
 		ret = bpf_map_new_fd(raw, f_flags);
 	else
@@ -512,10 +521,10 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 	return ret;
 }
 
-static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
+static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type, int mask)
 {
 	struct bpf_prog *prog;
-	int ret = inode_permission(inode, MAY_READ);
+	int ret = inode_permission(inode, mask);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -536,14 +545,14 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
 	return bpf_prog_inc(prog);
 }
 
-struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type, int mask)
 {
 	struct bpf_prog *prog;
 	struct path path;
 	int ret = kern_path(name, LOOKUP_FOLLOW, &path);
 	if (ret)
 		return ERR_PTR(ret);
-	prog = __get_prog_inode(d_backing_inode(path.dentry), type);
+	prog = __get_prog_inode(d_backing_inode(path.dentry), type, mask);
 	if (!IS_ERR(prog))
 		touch_atime(&path);
 	path_put(&path);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5d141f16f6fa..23f8f89d2a86 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -447,6 +447,7 @@ int bpf_map_new_fd(struct bpf_map *map, int flags)
 
 int bpf_get_file_flag(int flags)
 {
+	/* XXX: What about exec? */
 	if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
 		return -EINVAL;
 	if (flags & BPF_F_RDONLY)
@@ -556,6 +557,10 @@ static int map_create(union bpf_attr *attr)
 	if (err)
 		return -EINVAL;
 
+	/*
+	 * XXX: I'm a bit confused.  Why would you ever create a map and
+	 * grant *yourself* less than full permission?
+	 */
 	f_flags = bpf_get_file_flag(attr->map_flags);
 	if (f_flags < 0)
 		return f_flags;
@@ -1411,7 +1416,7 @@ const struct file_operations bpf_prog_fops = {
 	.write		= bpf_dummy_write,
 };
 
-int bpf_prog_new_fd(struct bpf_prog *prog)
+int bpf_prog_new_fd(struct bpf_prog *prog, int flags)
 {
 	int ret;
 
@@ -1420,10 +1425,10 @@ int bpf_prog_new_fd(struct bpf_prog *prog)
 		return ret;
 
 	return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
-				O_RDWR | O_CLOEXEC);
+				flags | O_CLOEXEC);
 }
 
-static struct bpf_prog *____bpf_prog_get(struct fd f)
+static struct bpf_prog *____bpf_prog_get(struct fd f, int mask)
 {
 	if (!f.file)
 		return ERR_PTR(-EBADF);
@@ -1431,6 +1436,10 @@ static struct bpf_prog *____bpf_prog_get(struct fd f)
 		fdput(f);
 		return ERR_PTR(-EINVAL);
 	}
+	if ((f.file->f_mode & mask) != mask) {
+		fdput(f);
+		return ERR_PTR(-EACCES);
+	}
 
 	return f.file->private_data;
 }
@@ -1497,12 +1506,12 @@ bool bpf_prog_get_ok(struct bpf_prog *prog,
 }
 
 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
-				       bool attach_drv)
+				       bool attach_drv, int mask)
 {
 	struct fd f = fdget(ufd);
 	struct bpf_prog *prog;
 
-	prog = ____bpf_prog_get(f);
+	prog = ____bpf_prog_get(f, mask);
 	if (IS_ERR(prog))
 		return prog;
 	if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
@@ -1516,15 +1525,15 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
 	return prog;
 }
 
-struct bpf_prog *bpf_prog_get(u32 ufd)
+struct bpf_prog *bpf_prog_get(u32 ufd, int mask)
 {
-	return __bpf_prog_get(ufd, NULL, false);
+	return __bpf_prog_get(ufd, NULL, false, mask);
 }
 
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
-				       bool attach_drv)
+				       bool attach_drv, int mask)
 {
-	return __bpf_prog_get(ufd, &type, attach_drv);
+	return __bpf_prog_get(ufd, &type, attach_drv, mask);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
 
@@ -1707,7 +1716,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	if (err)
 		goto free_used_maps;
 
-	err = bpf_prog_new_fd(prog);
+	err = bpf_prog_new_fd(prog, O_RDWR /* | O_MAYEXEC */);
 	if (err < 0) {
 		/* failed to allocate fd.
 		 * bpf_prog_put() is needed because the above
@@ -1808,7 +1817,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 	}
 	raw_tp->btp = btp;
 
-	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
+	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd, MAY_EXEC);
 	if (IS_ERR(prog)) {
 		err = PTR_ERR(prog);
 		goto out_free_tp;
@@ -1929,7 +1938,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype, MAY_EXEC);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
@@ -2083,7 +2092,11 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
 	    (!attr->test.ctx_size_out && attr->test.ctx_out))
 		return -EINVAL;
 
-	prog = bpf_prog_get(attr->test.prog_fd);
+	/*
+	 * A test run is is a form of query, so require RW.  Using W as a proxy for
+	 * X, since X is awkward due to a lack of O_MAYEXEC.
+	 */
+	prog = bpf_prog_get(attr->test.prog_fd, MAY_READ | MAY_WRITE);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
@@ -2147,7 +2160,11 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	fd = bpf_prog_new_fd(prog);
+	/*
+	 * We have all permissions.  This is okay, since we also require
+	 * CAP_SYS_ADMIN to do this at all.
+	 */
+	fd = bpf_prog_new_fd(prog, O_RDWR /* | O_MAYEXEC */);
 	if (fd < 0)
 		bpf_prog_put(prog);
 
@@ -2638,6 +2655,11 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 	if (!f.file)
 		return -EBADFD;
 
+	if (!(f.file->f_mode & FMODE_READ)) {
+		err = -EACCES;
+		goto out;
+	}
+
 	if (f.file->f_op == &bpf_prog_fops)
 		err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
 					      uattr);
@@ -2649,6 +2671,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 	else
 		err = -EINVAL;
 
+out:
 	fdput(f);
 	return err;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..f2e3973b28f2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8876,7 +8876,8 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
 	if (event->prog)
 		return -EEXIST;
 
-	prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+	/* Should maybe be FMODE_EXEC? */
+	prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT, FMODE_WRITE);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
@@ -8942,7 +8943,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 		/* bpf programs can only be attached to u/kprobe or tracepoint */
 		return -EINVAL;
 
-	prog = bpf_prog_get(prog_fd);
+	prog = bpf_prog_get(prog_fd, FMODE_WRITE);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index fc676b2610e3..3fcaeae693bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8093,8 +8093,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 			return -EBUSY;
 		}
 
+		/* XXX: FMODE_EXEC? */
 		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
-					     bpf_op == ops->ndo_bpf);
+					     bpf_op == ops->ndo_bpf,
+					     FMODE_WRITE);
 		if (IS_ERR(prog))
 			return PTR_ERR(prog);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 4e2a79b2fd77..9282462678fd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1544,7 +1544,8 @@ static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return ERR_PTR(-EPERM);
 
-	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+	/* FMODE_EXEC? */
+	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE);
 }
 
 int sk_attach_bpf(u32 ufd, struct sock *sk)
@@ -1572,9 +1573,10 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return -EPERM;
 
-	prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+	prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE);
 	if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
-		prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
+		prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT,
+					 FMODE_WRITE);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 13cf3f9b5938..34e5c08ee1f3 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -44,7 +44,7 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
 {
 	struct bpf_prog *prog;
 
-	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER, MAY_EXEC);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
@@ -57,7 +57,8 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
 	if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
 		return -EINVAL;
 
-	*ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+	*ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER,
+				      MAY_EXEC);
 	return PTR_ERR_OR_ZERO(*ret);
 }
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8d54f3047768..5b8c5e5d94bf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1563,7 +1563,7 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
 	if (copy_from_user(&fd, data, len))
 		return -EFAULT;
 
-	new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+	new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [WIP 2/4] bpf: Don't require mknod() permission to pin an object
  2019-08-05 21:29 [WIP 0/4] bpf: A bit of progress toward unprivileged use Andy Lutomirski
  2019-08-05 21:29 ` [WIP 1/4] bpf: Respect persistent map and prog access modes Andy Lutomirski
@ 2019-08-05 21:29 ` Andy Lutomirski
  2019-08-05 21:29 ` [WIP 3/4] bpf: Add a way to mark functions as requiring privilege Andy Lutomirski
  2019-08-05 21:29 ` [WIP 4/4] bpf: Allow creating all program types without privilege Andy Lutomirski
  3 siblings, 0 replies; 5+ messages in thread
From: Andy Lutomirski @ 2019-08-05 21:29 UTC (permalink / raw)
  To: LKML, Alexei Starovoitov
  Cc: Song Liu, Kees Cook, Networking, bpf, Daniel Borkmann,
	Alexei Starovoitov, Kernel Team, Lorenz Bauer, Jann Horn,
	Greg KH, Linux API, LSM List, Andy Lutomirski

security_path_mknod() seems excessive for pinning an object --
pinning an object is effectively just creating a file.  It's also
redundant, as vfs_mkobj() calls security_inode_create() by itself.

This isn't strictly required -- mknod(path, S_IFREG, unused) works
to create regular files, but bpf is currently the only user in the
kernel outside of mknod() itself that uses it to create regular
(i.e. S_IFREG) files.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 kernel/bpf/inode.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index cb07736b33ae..14304609003a 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -394,10 +394,6 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
 
 	mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
 
-	ret = security_path_mknod(&path, dentry, mode, 0);
-	if (ret)
-		goto out;
-
 	dir = d_inode(path.dentry);
 	if (dir->i_op != &bpf_dir_iops) {
 		ret = -EPERM;
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [WIP 3/4] bpf: Add a way to mark functions as requiring privilege
  2019-08-05 21:29 [WIP 0/4] bpf: A bit of progress toward unprivileged use Andy Lutomirski
  2019-08-05 21:29 ` [WIP 1/4] bpf: Respect persistent map and prog access modes Andy Lutomirski
  2019-08-05 21:29 ` [WIP 2/4] bpf: Don't require mknod() permission to pin an object Andy Lutomirski
@ 2019-08-05 21:29 ` Andy Lutomirski
  2019-08-05 21:29 ` [WIP 4/4] bpf: Allow creating all program types without privilege Andy Lutomirski
  3 siblings, 0 replies; 5+ messages in thread
From: Andy Lutomirski @ 2019-08-05 21:29 UTC (permalink / raw)
  To: LKML, Alexei Starovoitov
  Cc: Song Liu, Kees Cook, Networking, bpf, Daniel Borkmann,
	Alexei Starovoitov, Kernel Team, Lorenz Bauer, Jann Horn,
	Greg KH, Linux API, LSM List, Andy Lutomirski

This is horribly incomplete:

 - I only marked one function as requiring privilege, and there are
   surely more.

 - Checking is_priv is probably not the right thing to do.  This should
   probably do something more clever.  At the very lease, it needs to
   integrate with the upcoming lockdown LSM infrastructure.

 - The seen_privileged_funcs mechanism is probably not a good solution.
   Instead we should check something while we still have enough context
   to give a good error message.  But we *don't* want to check for
   capabilities up front before even seeing a function call, since we
   don't want to inadvertently generate audit events for privileges that
   are never used.

So it's the idea that counts :)

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 include/linux/bpf.h          | 15 +++++++++++++++
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/verifier.c        |  8 ++++++++
 kernel/trace/bpf_trace.c     |  1 +
 4 files changed, 25 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2d5e1a4dff6c..de31b9888b6c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -229,6 +229,7 @@ struct bpf_func_proto {
 	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 	bool gpl_only;
 	bool pkt_access;
+	u16 privilege;
 	enum bpf_return_type ret_type;
 	enum bpf_arg_type arg1_type;
 	enum bpf_arg_type arg2_type;
@@ -237,6 +238,20 @@ struct bpf_func_proto {
 	enum bpf_arg_type arg5_type;
 };
 
+/*
+ * Some functions should require privilege to call at all, even in a test
+ * run.  These flags indicate why privilege is required.  The core BPF
+ * code will verify that the creator of such a program has the requisite
+ * privilege.
+ *
+ * NB: This means that anyone who creates a privileged program (due to
+ * such a call or due to a privilege-requiring pointer-to-integer conversion)
+ * is responsible for restricting access to the program in an appropriate
+ * manner.
+ */
+#define BPF_FUNC_PRIV_READ_KERNEL_MEMORY BIT(0)
+#define BPT_FUNC_PRIV_WRITE_GLOBAL_LOGS BIT(1)
+
 /* bpf_context is intentionally undefined structure. Pointer to bpf_context is
  * the first argument to eBPF programs.
  * For socket filters: 'struct bpf_context *' == 'struct sk_buff *'
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5fe99f322b1c..9877f5753cf4 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -363,6 +363,7 @@ struct bpf_verifier_env {
 	u32 id_gen;			/* used to generate unique reg IDs */
 	bool allow_ptr_leaks;
 	bool seen_direct_write;
+	u16 seen_privileged_funcs;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 	const struct bpf_line_info *prev_linfo;
 	struct bpf_verifier_log log;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5900cbb966b1..5e048688fd8d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4129,6 +4129,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 
 	if (changes_data)
 		clear_all_pkt_pointers(env);
+
+	env->seen_privileged_funcs |= fn->privilege;
+
 	return 0;
 }
 
@@ -9371,6 +9374,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (ret == 0)
 		adjust_btf_func(env);
 
+	if (env->seen_privileged_funcs && !is_priv) {
+		ret = -EPERM;
+		goto err_release_maps;
+	}
+
 err_release_maps:
 	if (!env->prog->aux->used_maps)
 		/* if we didn't copy map pointers into bpf_prog_info, release
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ca1255d14576..d9454588d9e8 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -152,6 +152,7 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 static const struct bpf_func_proto bpf_probe_read_proto = {
 	.func		= bpf_probe_read,
 	.gpl_only	= true,
+	.privilege	= BPF_FUNC_PRIV_READ_KERNEL_MEMORY,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [WIP 4/4] bpf: Allow creating all program types without privilege
  2019-08-05 21:29 [WIP 0/4] bpf: A bit of progress toward unprivileged use Andy Lutomirski
                   ` (2 preceding siblings ...)
  2019-08-05 21:29 ` [WIP 3/4] bpf: Add a way to mark functions as requiring privilege Andy Lutomirski
@ 2019-08-05 21:29 ` Andy Lutomirski
  3 siblings, 0 replies; 5+ messages in thread
From: Andy Lutomirski @ 2019-08-05 21:29 UTC (permalink / raw)
  To: LKML, Alexei Starovoitov
  Cc: Song Liu, Kees Cook, Networking, bpf, Daniel Borkmann,
	Alexei Starovoitov, Kernel Team, Lorenz Bauer, Jann Horn,
	Greg KH, Linux API, LSM List, Andy Lutomirski

This doesn't let you *run* the programs except in test mode, so it should
be safe.  Famous last words.

This assumes that the check-privilege-to-call-privileged-functions
patch actually catches all the cases and that there's nothing else
that should need privilege lurking in the type-specific verifiers.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 kernel/bpf/syscall.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 23f8f89d2a86..730afa2be786 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1649,8 +1649,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	    attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
 		return -E2BIG;
 	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
-	    type != BPF_PROG_TYPE_CGROUP_SKB &&
-	    !capable(CAP_SYS_ADMIN))
+	    type != BPF_PROG_TYPE_CGROUP_SKB)
 		return -EPERM;
 
 	bpf_prog_load_fixup_attach_type(attr);
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-08-05 21:29 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-05 21:29 [WIP 0/4] bpf: A bit of progress toward unprivileged use Andy Lutomirski
2019-08-05 21:29 ` [WIP 1/4] bpf: Respect persistent map and prog access modes Andy Lutomirski
2019-08-05 21:29 ` [WIP 2/4] bpf: Don't require mknod() permission to pin an object Andy Lutomirski
2019-08-05 21:29 ` [WIP 3/4] bpf: Add a way to mark functions as requiring privilege Andy Lutomirski
2019-08-05 21:29 ` [WIP 4/4] bpf: Allow creating all program types without privilege Andy Lutomirski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).