All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Borkmann <daniel@iogearbox.net>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, joe@wand.net.nz, yhs@fb.com,
	andrii.nakryiko@gmail.com, kafai@fb.com,
	Daniel Borkmann <daniel@iogearbox.net>
Subject: [PATCH bpf-next v3 03/15] bpf: add syscall side map lock support
Date: Wed,  3 Apr 2019 20:22:54 +0200	[thread overview]
Message-ID: <f0ab2bf5c028817c3f686b6f6ed65d317e54ed75.1554314902.git.daniel@iogearbox.net> (raw)
In-Reply-To: <cover.1554314902.git.daniel@iogearbox.net>
In-Reply-To: <cover.1554314902.git.daniel@iogearbox.net>

This patch adds a new BPF_MAP_LOCK command which allows to lock the
map globally as read-only/immutable from syscall side. Map permission
handling has been refactored into map_get_sys_perms() and drops
FMODE_CAN_WRITE in case of locked map. Main use case is to allow
for setting up .rodata sections from the BPF ELF which are loaded
into the kernel, meaning BPF loader first allocates map, sets up
map value by copying .rodata section into it and once complete, it
calls BPF_MAP_LOCK on the map fd to prevent further modifications.
Given maps can be shared, we only grant the original creator of
the map the ability to lock it as syscall-side read-only or only
priviledged users otherwise.

Right now BPF_MAP_LOCK only takes map fd as argument while remaining
bpf_attr members are required to be zero. I didn't add write-only
locking here as counterpart since I don't have a concrete use-case
for it on my side, and I think it makes probably more sense to wait
once there is actually one. In that case bpf_attr can be extended
as usual with a flag field and/or others where flag 0 means that
we lock the map read-only hence this doesn't prevent to add further
extensions to BPF_MAP_LOCK upon need.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h      |  5 +++-
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/syscall.c     | 72 ++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 65 insertions(+), 13 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 214c0d2..15fc24f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -87,7 +87,10 @@ struct bpf_map {
 	struct btf *btf;
 	u32 pages;
 	bool unpriv_array;
-	/* 51 bytes hole */
+	/* Next two members are write-once. */
+	bool sys_immutable;
+	struct task_struct *creator;
+	/* 40 bytes hole */
 
 	/* The 3rd and 4th cacheline with misc members to avoid false sharing
 	 * particularly with refcounting.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3735432..5cf9c74 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -105,6 +105,7 @@ enum bpf_cmd {
 	BPF_BTF_GET_FD_BY_ID,
 	BPF_TASK_FD_QUERY,
 	BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+	BPF_MAP_LOCK,
 };
 
 enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e25a1cf..18bbe69 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -336,6 +336,8 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
 {
 	struct bpf_map *map = filp->private_data;
 
+	if (READ_ONCE(map->creator))
+		cmpxchg(&map->creator, current, NULL);
 	if (map->ops->map_release)
 		map->ops->map_release(map, filp);
 
@@ -343,6 +345,18 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
+{
+	fmode_t mode = f.file->f_mode;
+
+	/* Our file permissions may have been overridden by global
+	 * map permissions facing syscall side.
+	 */
+	if (READ_ONCE(map->sys_immutable))
+		mode &= ~FMODE_CAN_WRITE;
+	return mode;
+}
+
 #ifdef CONFIG_PROC_FS
 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 {
@@ -364,14 +378,16 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "max_entries:\t%u\n"
 		   "map_flags:\t%#x\n"
 		   "memlock:\t%llu\n"
-		   "map_id:\t%u\n",
+		   "map_id:\t%u\n"
+		   "sys_immutable:\t%u\n",
 		   map->map_type,
 		   map->key_size,
 		   map->value_size,
 		   map->max_entries,
 		   map->map_flags,
 		   map->pages * 1ULL << PAGE_SHIFT,
-		   map->id);
+		   map->id,
+		   READ_ONCE(map->sys_immutable));
 
 	if (owner_prog_type) {
 		seq_printf(m, "owner_prog_type:\t%u\n",
@@ -541,6 +557,7 @@ static int map_create(union bpf_attr *attr)
 	if (err)
 		goto free_map_nouncharge;
 
+	WRITE_ONCE(map->creator, current);
 	atomic_set(&map->refcnt, 1);
 	atomic_set(&map->usercnt, 1);
 
@@ -715,8 +732,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-
-	if (!(f.file->f_mode & FMODE_CAN_READ)) {
+	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -845,8 +861,7 @@ static int map_update_elem(union bpf_attr *attr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-
-	if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -957,8 +972,7 @@ static int map_delete_elem(union bpf_attr *attr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-
-	if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -1009,8 +1023,7 @@ static int map_get_next_key(union bpf_attr *attr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-
-	if (!(f.file->f_mode & FMODE_CAN_READ)) {
+	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -1077,8 +1090,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-
-	if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -1120,6 +1132,39 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	return err;
 }
 
+#define BPF_MAP_LOCK_LAST_FIELD map_fd
+
+static int map_lock(const union bpf_attr *attr)
+{
+	int err = 0, ufd = attr->map_fd;
+	struct bpf_map *map;
+	struct fd f;
+
+	if (CHECK_ATTR(BPF_MAP_LOCK))
+		return -EINVAL;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	if (READ_ONCE(map->sys_immutable)) {
+		err = -EBUSY;
+		goto err_put;
+	}
+	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE) ||
+	    (!capable(CAP_SYS_ADMIN) &&
+	     READ_ONCE(map->creator) != current)) {
+		err = -EPERM;
+		goto err_put;
+	}
+
+	WRITE_ONCE(map->sys_immutable, true);
+	synchronize_rcu();
+err_put:
+	fdput(f);
+	return err;
+}
+
 static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name) \
 	[_id] = & _name ## _prog_ops,
@@ -2725,6 +2770,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_MAP_GET_NEXT_KEY:
 		err = map_get_next_key(&attr);
 		break;
+	case BPF_MAP_LOCK:
+		err = map_lock(&attr);
+		break;
 	case BPF_PROG_LOAD:
 		err = bpf_prog_load(&attr, uattr);
 		break;
-- 
2.9.5


  parent reply	other threads:[~2019-04-03 18:23 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-03 18:22 [PATCH bpf-next v3 00/15] BPF support for global data Daniel Borkmann
2019-04-03 18:22 ` [PATCH bpf-next v3 01/15] bpf: implement lookup-free direct value access for maps Daniel Borkmann
2019-04-03 18:22 ` [PATCH bpf-next v3 02/15] bpf: add program side {rd,wr}only support " Daniel Borkmann
2019-04-03 18:22 ` Daniel Borkmann [this message]
2019-04-03 18:22 ` [PATCH bpf-next v3 04/15] bpf: allow . char as part of the object name Daniel Borkmann
2019-04-03 18:22 ` [PATCH bpf-next v3 05/15] bpf: add specification for BTF Var and DataSec kinds Daniel Borkmann
2019-04-03 18:22 ` [PATCH bpf-next v3 06/15] bpf: kernel side support for BTF Var and DataSec Daniel Borkmann
2019-04-04 19:20   ` Martin Lau
2019-04-05  7:03     ` Martin Lau
2019-04-05  7:44       ` Daniel Borkmann
2019-04-03 18:22 ` [PATCH bpf-next v3 07/15] bpf: allow for key-less BTF in array map Daniel Borkmann
2019-04-03 18:22 ` [PATCH bpf-next v3 08/15] bpf: sync {btf,bpf}.h uapi header from tools infrastructure Daniel Borkmann
2019-04-03 18:23 ` [PATCH bpf-next v3 09/15] bpf, libbpf: refactor relocation handling Daniel Borkmann
2019-04-03 18:23 ` [PATCH bpf-next v3 10/15] bpf, libbpf: support global data/bss/rodata sections Daniel Borkmann
2019-04-03 18:23 ` [PATCH bpf-next v3 11/15] bpf, libbpf: add support for BTF Var and DataSec Daniel Borkmann
2019-04-03 18:23 ` [PATCH bpf-next v3 12/15] bpf: bpftool support for dumping data/bss/rodata sections Daniel Borkmann
2019-04-03 18:23 ` [PATCH bpf-next v3 13/15] bpf, selftest: test {rd,wr}only flags and direct value access Daniel Borkmann
2019-04-03 18:23 ` [PATCH bpf-next v3 14/15] bpf, selftest: test global data/bss/rodata sections Daniel Borkmann
2019-04-03 18:23 ` [PATCH bpf-next v3 15/15] bpf, selftest: add test cases for BTF Var and DataSec Daniel Borkmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f0ab2bf5c028817c3f686b6f6ed65d317e54ed75.1554314902.git.daniel@iogearbox.net \
    --to=daniel@iogearbox.net \
    --cc=andrii.nakryiko@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=joe@wand.net.nz \
    --cc=kafai@fb.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.