All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next 1/2] bpf: change uapi for bpf iterator map elements
  2020-08-02  4:21 [PATCH bpf-next 0/2] bpf: change uapi for bpf iterator map elements Yonghong Song
@ 2020-08-02  4:21 ` Yonghong Song
  2020-08-03  1:25   ` Andrii Nakryiko
  2020-08-02  4:21 ` [PATCH bpf-next 2/2] libbpf: support new uapi for map element bpf iterator Yonghong Song
  1 sibling, 1 reply; 9+ messages in thread
From: Yonghong Song @ 2020-08-02  4:21 UTC (permalink / raw)
  To: bpf, netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
map elements") added bpf iterator support for
map elements. The map element bpf iterator requires
info to identify a particular map. In the above
commit, the attr->link_create.target_fd is used
to carry map_fd and an enum bpf_iter_link_info
is added to uapi to specify the target_fd actually
representing a map_fd:
    enum bpf_iter_link_info {
	BPF_ITER_LINK_UNSPEC = 0,
	BPF_ITER_LINK_MAP_FD = 1,

	MAX_BPF_ITER_LINK_INFO,
    };

This is an extensible approach as we can grow
enumerator for pid, cgroup_id, etc. and we can
unionize target_fd for pid, cgroup_id, etc.
But in the future, there are chances that
more complex customization may happen, e.g.,
for tasks, it could be filtered based on
both cgroup_id and user_id.

This patch changed the uapi to have fields
	__aligned_u64	iter_info;
	__u32		iter_info_len;
for additional iter_info for link_create.
The iter_info is defined as
	union bpf_iter_link_info {
		struct {
			__u32   map_fd;
		} map;
	};

So future extension for additional customization
will be easier. The bpf_iter_link_info will be
passed to target callback to validate and generic
bpf_iter framework does not need to deal it any
more.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            | 10 ++++---
 include/uapi/linux/bpf.h       | 15 +++++-----
 kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
 kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
 kernel/bpf/syscall.c           |  2 +-
 net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
 tools/include/uapi/linux/bpf.h | 15 +++++-----
 7 files changed, 104 insertions(+), 64 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cef4ef0d2b4e..55f694b63164 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info {
 	struct bpf_map *map;
 };
 
-typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
-				       struct bpf_iter_aux_info *aux);
+typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
+					union bpf_iter_link_info *linfo,
+					struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
 
 #define BPF_ITER_CTX_ARG_MAX 2
 struct bpf_iter_reg {
 	const char *target;
-	bpf_iter_check_target_t check_target;
+	bpf_iter_attach_target_t attach_target;
+	bpf_iter_detach_target_t detach_target;
 	u32 ctx_arg_info_size;
-	enum bpf_iter_link_info req_linfo;
 	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
 	const struct bpf_iter_seq_info *seq_info;
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b134e679e9db..0480f893facd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type */
 };
 
+union bpf_iter_link_info {
+	struct {
+		__u32	map_fd;
+	} map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-	BPF_ITER_LINK_UNSPEC = 0,
-	BPF_ITER_LINK_MAP_FD = 1,
-
-	MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
+		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+		__u32		iter_info_len;	/* iter_info length */
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 363b9cafc2d8..20d62020807f 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link)
 	struct bpf_iter_link *iter_link =
 		container_of(link, struct bpf_iter_link, link);
 
-	if (iter_link->aux.map)
-		bpf_map_put_with_uref(iter_link->aux.map);
+	if (iter_link->tinfo->reg_info->detach_target)
+		iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
 }
 
 static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -390,15 +390,29 @@ bool bpf_link_is_iter(struct bpf_link *link)
 
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
+	union bpf_iter_link_info __user *ulinfo;
 	struct bpf_link_primer link_primer;
 	struct bpf_iter_target_info *tinfo;
-	struct bpf_iter_aux_info aux = {};
+	union bpf_iter_link_info linfo;
 	struct bpf_iter_link *link;
-	u32 prog_btf_id, target_fd;
+	u32 prog_btf_id, linfo_len;
 	bool existed = false;
-	struct bpf_map *map;
 	int err;
 
+	memset(&linfo, 0, sizeof(union bpf_iter_link_info));
+
+	ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
+	linfo_len = attr->link_create.iter_info_len;
+	if (ulinfo && linfo_len) {
+		err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
+					       linfo_len);
+		if (err)
+			return err;
+		linfo_len = min_t(u32, linfo_len, sizeof(linfo));
+		if (copy_from_user(&linfo, ulinfo, linfo_len))
+			return -EFAULT;
+	}
+
 	prog_btf_id = prog->aux->attach_btf_id;
 	mutex_lock(&targets_mutex);
 	list_for_each_entry(tinfo, &targets, list) {
@@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	if (!existed)
 		return -ENOENT;
 
-	/* Make sure user supplied flags are target expected. */
-	target_fd = attr->link_create.target_fd;
-	if (attr->link_create.flags != tinfo->reg_info->req_linfo)
-		return -EINVAL;
-	if (!attr->link_create.flags && target_fd)
-		return -EINVAL;
-
 	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
 	if (!link)
 		return -ENOMEM;
@@ -431,28 +438,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 		return err;
 	}
 
-	if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
-		map = bpf_map_get_with_uref(target_fd);
-		if (IS_ERR(map)) {
-			err = PTR_ERR(map);
-			goto cleanup_link;
-		}
-
-		aux.map = map;
-		err = tinfo->reg_info->check_target(prog, &aux);
+	if (tinfo->reg_info->attach_target) {
+		err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
 		if (err) {
-			bpf_map_put_with_uref(map);
-			goto cleanup_link;
+			bpf_link_cleanup(&link_primer);
+			return err;
 		}
-
-		link->aux.map = map;
 	}
 
 	return bpf_link_settle(&link_primer);
-
-cleanup_link:
-	bpf_link_cleanup(&link_primer);
-	return err;
 }
 
 static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index fbe1f557cb88..131603fe1cbf 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = {
 	.seq_info		= &bpf_map_seq_info,
 };
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-			      struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+			       union bpf_iter_link_info *linfo,
+			       struct bpf_iter_aux_info *aux)
 {
 	u32 key_acc_size, value_acc_size, key_size, value_size;
-	struct bpf_map *map = aux->map;
+	struct bpf_map *map;
 	bool is_percpu = false;
+	int err = -EINVAL;
+
+	if (!linfo->map.map_fd)
+		return -EINVAL;
+
+	map = bpf_map_get_with_uref(linfo->map.map_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
 	else if (map->map_type != BPF_MAP_TYPE_HASH &&
 		 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
 		 map->map_type != BPF_MAP_TYPE_ARRAY)
-		return -EINVAL;
+		goto put_map;
 
 	key_acc_size = prog->aux->max_rdonly_access;
 	value_acc_size = prog->aux->max_rdwr_access;
@@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
 	else
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
 
-	if (key_acc_size > key_size || value_acc_size > value_size)
-		return -EACCES;
+	if (key_acc_size > key_size || value_acc_size > value_size) {
+		err = -EACCES;
+		goto put_map;
+	}
 
+	aux->map = map;
 	return 0;
+
+put_map:
+	bpf_map_put_with_uref(map);
+	return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+	bpf_map_put_with_uref(aux->map);
 }
 
 DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
@@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
 
 static const struct bpf_iter_reg bpf_map_elem_reg_info = {
 	.target			= "bpf_map_elem",
-	.check_target		= bpf_iter_check_map,
-	.req_linfo		= BPF_ITER_LINK_MAP_FD,
+	.attach_target		= bpf_iter_attach_map,
+	.detach_target		= bpf_iter_detach_map,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_map_elem, key),
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2f343ce15747..86299a292214 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
 	return -EINVAL;
 }
 
-#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
+#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
 static int link_create(union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d3377c90a291..6c0bdb5a0b26 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data,
 	return 0;
 }
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-			      struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+			       union bpf_iter_link_info *linfo,
+			       struct bpf_iter_aux_info *aux)
 {
-	struct bpf_map *map = aux->map;
+	struct bpf_map *map;
+	int err = -EINVAL;
 
-	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+	if (!linfo->map.map_fd)
 		return -EINVAL;
 
-	if (prog->aux->max_rdonly_access > map->value_size)
-		return -EACCES;
+	map = bpf_map_get_with_uref(linfo->map.map_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+		goto put_map;
+
+	if (prog->aux->max_rdonly_access > map->value_size) {
+		err = -EACCES;
+		goto put_map;
+	}
 
+	aux->map = map;
 	return 0;
+
+put_map:
+	bpf_map_put_with_uref(map);
+	return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+	bpf_map_put_with_uref(aux->map);
 }
 
 static const struct seq_operations bpf_sk_storage_map_seq_ops = {
@@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 
 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
 	.target			= "bpf_sk_storage_map",
-	.check_target		= bpf_iter_check_map,
-	.req_linfo		= BPF_ITER_LINK_MAP_FD,
+	.attach_target		= bpf_iter_attach_map,
+	.detach_target		= bpf_iter_detach_map,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b134e679e9db..0480f893facd 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type */
 };
 
+union bpf_iter_link_info {
+	struct {
+		__u32	map_fd;
+	} map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-	BPF_ITER_LINK_UNSPEC = 0,
-	BPF_ITER_LINK_MAP_FD = 1,
-
-	MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
+		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+		__u32		iter_info_len;	/* iter_info length */
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH bpf-next 0/2] bpf: change uapi for bpf iterator map elements
@ 2020-08-02  4:21 Yonghong Song
  2020-08-02  4:21 ` [PATCH bpf-next 1/2] " Yonghong Song
  2020-08-02  4:21 ` [PATCH bpf-next 2/2] libbpf: support new uapi for map element bpf iterator Yonghong Song
  0 siblings, 2 replies; 9+ messages in thread
From: Yonghong Song @ 2020-08-02  4:21 UTC (permalink / raw)
  To: bpf, netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

Andrii raised a concern that current uapi for bpf iterator map
element is a little restrictive and not suitable for future potential
complex customization. This is a valid suggestion, considering people
may indeed add more complex custimization to the iterator, e.g.,
cgroup_id + user_id, etc. for task or task_file. Another example might
be map_id plus additional control so that the bpf iterator may bail
out a bucket earlier if a bucket has too many elements which may hold
lock too long and impact other parts of systems.

Patch #1 modified uapi with kernel changes. Patch #2
adjusted libbpf api accordingly.

Yonghong Song (2):
  bpf: change uapi for bpf iterator map elements
  libbpf: support new uapi for map element bpf iterator

 include/linux/bpf.h            | 10 ++++---
 include/uapi/linux/bpf.h       | 15 +++++-----
 kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
 kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
 kernel/bpf/syscall.c           |  2 +-
 net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
 tools/include/uapi/linux/bpf.h | 15 +++++-----
 tools/lib/bpf/bpf.c            |  4 ++-
 tools/lib/bpf/bpf.h            |  5 ++--
 tools/lib/bpf/libbpf.c         |  7 +++--
 10 files changed, 115 insertions(+), 69 deletions(-)

-- 
2.24.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH bpf-next 2/2] libbpf: support new uapi for map element bpf iterator
  2020-08-02  4:21 [PATCH bpf-next 0/2] bpf: change uapi for bpf iterator map elements Yonghong Song
  2020-08-02  4:21 ` [PATCH bpf-next 1/2] " Yonghong Song
@ 2020-08-02  4:21 ` Yonghong Song
  2020-08-03  1:35   ` Andrii Nakryiko
  1 sibling, 1 reply; 9+ messages in thread
From: Yonghong Song @ 2020-08-02  4:21 UTC (permalink / raw)
  To: bpf, netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

Previous commit adjusted kernel uapi for map
element bpf iterator. This patch adjusted libbpf API
due to uapi change.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/lib/bpf/bpf.c    | 4 +++-
 tools/lib/bpf/bpf.h    | 5 +++--
 tools/lib/bpf/libbpf.c | 7 +++++--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index eab14c97c15d..c75a84398d51 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -598,7 +598,9 @@ int bpf_link_create(int prog_fd, int target_fd,
 	attr.link_create.prog_fd = prog_fd;
 	attr.link_create.target_fd = target_fd;
 	attr.link_create.attach_type = attach_type;
-	attr.link_create.flags = OPTS_GET(opts, flags, 0);
+	attr.link_create.iter_info =
+		ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+	attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
 
 	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
 }
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 28855fd5b5f4..c9895f191305 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -170,9 +170,10 @@ LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
 
 struct bpf_link_create_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
-	__u32 flags;
+	union bpf_iter_link_info *iter_info;
+	__u32 iter_info_len;
 };
-#define bpf_link_create_opts__last_field flags
+#define bpf_link_create_opts__last_field iter_info_len
 
 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
 			       enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7be04e45d29c..dc8fabf9d30d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8298,6 +8298,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
 			 const struct bpf_iter_attach_opts *opts)
 {
 	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
+	union bpf_iter_link_info linfo;
 	char errmsg[STRERR_BUFSIZE];
 	struct bpf_link *link;
 	int prog_fd, link_fd;
@@ -8307,8 +8308,10 @@ bpf_program__attach_iter(struct bpf_program *prog,
 		return ERR_PTR(-EINVAL);
 
 	if (OPTS_HAS(opts, map_fd)) {
-		target_fd = opts->map_fd;
-		link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
+		memset(&linfo, 0, sizeof(linfo));
+		linfo.map.map_fd = opts->map_fd;
+		link_create_opts.iter_info = &linfo;
+		link_create_opts.iter_info_len = sizeof(linfo);
 	}
 
 	prog_fd = bpf_program__fd(prog);
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: change uapi for bpf iterator map elements
  2020-08-02  4:21 ` [PATCH bpf-next 1/2] " Yonghong Song
@ 2020-08-03  1:25   ` Andrii Nakryiko
  2020-08-03  2:23     ` Yonghong Song
  0 siblings, 1 reply; 9+ messages in thread
From: Andrii Nakryiko @ 2020-08-03  1:25 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann, Kernel Team

On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
>
> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
> map elements") added bpf iterator support for
> map elements. The map element bpf iterator requires
> info to identify a particular map. In the above
> commit, the attr->link_create.target_fd is used
> to carry map_fd and an enum bpf_iter_link_info
> is added to uapi to specify the target_fd actually
> representing a map_fd:
>     enum bpf_iter_link_info {
>         BPF_ITER_LINK_UNSPEC = 0,
>         BPF_ITER_LINK_MAP_FD = 1,
>
>         MAX_BPF_ITER_LINK_INFO,
>     };
>
> This is an extensible approach as we can grow
> enumerator for pid, cgroup_id, etc. and we can
> unionize target_fd for pid, cgroup_id, etc.
> But in the future, there are chances that
> more complex customization may happen, e.g.,
> for tasks, it could be filtered based on
> both cgroup_id and user_id.
>
> This patch changed the uapi to have fields
>         __aligned_u64   iter_info;
>         __u32           iter_info_len;
> for additional iter_info for link_create.
> The iter_info is defined as
>         union bpf_iter_link_info {
>                 struct {
>                         __u32   map_fd;
>                 } map;
>         };
>
> So future extension for additional customization
> will be easier. The bpf_iter_link_info will be
> passed to target callback to validate and generic
> bpf_iter framework does not need to deal it any
> more.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h            | 10 ++++---
>  include/uapi/linux/bpf.h       | 15 +++++-----
>  kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
>  kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
>  kernel/bpf/syscall.c           |  2 +-
>  net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
>  tools/include/uapi/linux/bpf.h | 15 +++++-----
>  7 files changed, 104 insertions(+), 64 deletions(-)
>

[...]

>  int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>  {
> +       union bpf_iter_link_info __user *ulinfo;
>         struct bpf_link_primer link_primer;
>         struct bpf_iter_target_info *tinfo;
> -       struct bpf_iter_aux_info aux = {};
> +       union bpf_iter_link_info linfo;
>         struct bpf_iter_link *link;
> -       u32 prog_btf_id, target_fd;
> +       u32 prog_btf_id, linfo_len;
>         bool existed = false;
> -       struct bpf_map *map;
>         int err;
>
> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
> +
> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
> +       linfo_len = attr->link_create.iter_info_len;
> +       if (ulinfo && linfo_len) {

We probably want to be more strict here: if either pointer or len is
non-zero, both should be present and valid. Otherwise we can have
garbage in iter_info, as long as iter_info_len is zero.

> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
> +                                              linfo_len);
> +               if (err)
> +                       return err;
> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
> +                       return -EFAULT;
> +       }
> +
>         prog_btf_id = prog->aux->attach_btf_id;
>         mutex_lock(&targets_mutex);
>         list_for_each_entry(tinfo, &targets, list) {
> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>         if (!existed)
>                 return -ENOENT;
>
> -       /* Make sure user supplied flags are target expected. */
> -       target_fd = attr->link_create.target_fd;
> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
> -               return -EINVAL;
> -       if (!attr->link_create.flags && target_fd)
> -               return -EINVAL;
> -

Please still ensure that no flags are specified.


>         link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
>         if (!link)
>                 return -ENOMEM;
> @@ -431,28 +438,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>                 return err;
>         }
>

[...]

> -static int bpf_iter_check_map(struct bpf_prog *prog,
> -                             struct bpf_iter_aux_info *aux)
> +static int bpf_iter_attach_map(struct bpf_prog *prog,
> +                              union bpf_iter_link_info *linfo,
> +                              struct bpf_iter_aux_info *aux)
>  {
> -       struct bpf_map *map = aux->map;
> +       struct bpf_map *map;
> +       int err = -EINVAL;
>
> -       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> +       if (!linfo->map.map_fd)
>                 return -EINVAL;

This could be -EBADF?

>
> -       if (prog->aux->max_rdonly_access > map->value_size)
> -               return -EACCES;
> +       map = bpf_map_get_with_uref(linfo->map.map_fd);
> +       if (IS_ERR(map))
> +               return PTR_ERR(map);
> +
> +       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> +               goto put_map;
> +
> +       if (prog->aux->max_rdonly_access > map->value_size) {
> +               err = -EACCES;
> +               goto put_map;
> +       }

[...]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH bpf-next 2/2] libbpf: support new uapi for map element bpf iterator
  2020-08-02  4:21 ` [PATCH bpf-next 2/2] libbpf: support new uapi for map element bpf iterator Yonghong Song
@ 2020-08-03  1:35   ` Andrii Nakryiko
  2020-08-03  2:30     ` Yonghong Song
  0 siblings, 1 reply; 9+ messages in thread
From: Andrii Nakryiko @ 2020-08-03  1:35 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann, Kernel Team

On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
>
> Previous commit adjusted kernel uapi for map
> element bpf iterator. This patch adjusted libbpf API
> due to uapi change.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  tools/lib/bpf/bpf.c    | 4 +++-
>  tools/lib/bpf/bpf.h    | 5 +++--
>  tools/lib/bpf/libbpf.c | 7 +++++--
>  3 files changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> index eab14c97c15d..c75a84398d51 100644
> --- a/tools/lib/bpf/bpf.c
> +++ b/tools/lib/bpf/bpf.c
> @@ -598,7 +598,9 @@ int bpf_link_create(int prog_fd, int target_fd,
>         attr.link_create.prog_fd = prog_fd;
>         attr.link_create.target_fd = target_fd;
>         attr.link_create.attach_type = attach_type;
> -       attr.link_create.flags = OPTS_GET(opts, flags, 0);
> +       attr.link_create.iter_info =
> +               ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
> +       attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
>
>         return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
>  }
> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> index 28855fd5b5f4..c9895f191305 100644
> --- a/tools/lib/bpf/bpf.h
> +++ b/tools/lib/bpf/bpf.h
> @@ -170,9 +170,10 @@ LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
>
>  struct bpf_link_create_opts {
>         size_t sz; /* size of this struct for forward/backward compatibility */
> -       __u32 flags;

I'd actually keep flags in link_create_ops, as it's part of the kernel
UAPI anyways, we won't have to add it later. Just pass it through into
bpf_attr.

> +       union bpf_iter_link_info *iter_info;
> +       __u32 iter_info_len;
>  };
> -#define bpf_link_create_opts__last_field flags
> +#define bpf_link_create_opts__last_field iter_info_len
>
>  LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
>                                enum bpf_attach_type attach_type,
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 7be04e45d29c..dc8fabf9d30d 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -8298,6 +8298,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
>                          const struct bpf_iter_attach_opts *opts)
>  {
>         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
> +       union bpf_iter_link_info linfo;
>         char errmsg[STRERR_BUFSIZE];
>         struct bpf_link *link;
>         int prog_fd, link_fd;
> @@ -8307,8 +8308,10 @@ bpf_program__attach_iter(struct bpf_program *prog,
>                 return ERR_PTR(-EINVAL);
>
>         if (OPTS_HAS(opts, map_fd)) {
> -               target_fd = opts->map_fd;
> -               link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
> +               memset(&linfo, 0, sizeof(linfo));
> +               linfo.map.map_fd = opts->map_fd;
> +               link_create_opts.iter_info = &linfo;
> +               link_create_opts.iter_info_len = sizeof(linfo);

Maybe instead of having map_fd directly in bpf_iter_attach_opts, let's
just accept bpf_iter_link_info and its len directly from the user?
Right now kernel UAPI and libbpf API for customizing iterator
attachment differ. It would be simpler to keep them in sync and we
won't have to discuss how to evolve bpf_iter_attach_opts as we add
more customization for different types of iterators. Thoughts?

>         }
>
>         prog_fd = bpf_program__fd(prog);
> --
> 2.24.1
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: change uapi for bpf iterator map elements
  2020-08-03  1:25   ` Andrii Nakryiko
@ 2020-08-03  2:23     ` Yonghong Song
  2020-08-03  5:11       ` Andrii Nakryiko
  0 siblings, 1 reply; 9+ messages in thread
From: Yonghong Song @ 2020-08-03  2:23 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann, Kernel Team



On 8/2/20 6:25 PM, Andrii Nakryiko wrote:
> On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
>> map elements") added bpf iterator support for
>> map elements. The map element bpf iterator requires
>> info to identify a particular map. In the above
>> commit, the attr->link_create.target_fd is used
>> to carry map_fd and an enum bpf_iter_link_info
>> is added to uapi to specify the target_fd actually
>> representing a map_fd:
>>      enum bpf_iter_link_info {
>>          BPF_ITER_LINK_UNSPEC = 0,
>>          BPF_ITER_LINK_MAP_FD = 1,
>>
>>          MAX_BPF_ITER_LINK_INFO,
>>      };
>>
>> This is an extensible approach as we can grow
>> enumerator for pid, cgroup_id, etc. and we can
>> unionize target_fd for pid, cgroup_id, etc.
>> But in the future, there are chances that
>> more complex customization may happen, e.g.,
>> for tasks, it could be filtered based on
>> both cgroup_id and user_id.
>>
>> This patch changed the uapi to have fields
>>          __aligned_u64   iter_info;
>>          __u32           iter_info_len;
>> for additional iter_info for link_create.
>> The iter_info is defined as
>>          union bpf_iter_link_info {
>>                  struct {
>>                          __u32   map_fd;
>>                  } map;
>>          };
>>
>> So future extension for additional customization
>> will be easier. The bpf_iter_link_info will be
>> passed to target callback to validate and generic
>> bpf_iter framework does not need to deal it any
>> more.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   include/linux/bpf.h            | 10 ++++---
>>   include/uapi/linux/bpf.h       | 15 +++++-----
>>   kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
>>   kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
>>   kernel/bpf/syscall.c           |  2 +-
>>   net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
>>   tools/include/uapi/linux/bpf.h | 15 +++++-----
>>   7 files changed, 104 insertions(+), 64 deletions(-)
>>
> 
> [...]
> 
>>   int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>   {
>> +       union bpf_iter_link_info __user *ulinfo;
>>          struct bpf_link_primer link_primer;
>>          struct bpf_iter_target_info *tinfo;
>> -       struct bpf_iter_aux_info aux = {};
>> +       union bpf_iter_link_info linfo;
>>          struct bpf_iter_link *link;
>> -       u32 prog_btf_id, target_fd;
>> +       u32 prog_btf_id, linfo_len;
>>          bool existed = false;
>> -       struct bpf_map *map;
>>          int err;
>>
>> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
>> +
>> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
>> +       linfo_len = attr->link_create.iter_info_len;
>> +       if (ulinfo && linfo_len) {
> 
> We probably want to be more strict here: if either pointer or len is
> non-zero, both should be present and valid. Otherwise we can have
> garbage in iter_info, as long as iter_info_len is zero.

yes, it is possible iter_info_len = 0 and iter_info is not null and
if this happens, iter_info will not be examined.

in kernel, we have places this is handled similarly. For example,
for cgroup bpf_prog query.

kernel/bpf/cgroup.c, function __cgroup_bpf_query

   __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
   ...
   if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
     return 0;

In the above case, it is possible prog_cnt = 0 and prog_ids != NULL,
or prog_ids == NULL and prog_cnt != 0, and we won't return error
to user space.

Not 100% sure whether we have convention here or not.

> 
>> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
>> +                                              linfo_len);
>> +               if (err)
>> +                       return err;
>> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
>> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
>> +                       return -EFAULT;
>> +       }
>> +
>>          prog_btf_id = prog->aux->attach_btf_id;
>>          mutex_lock(&targets_mutex);
>>          list_for_each_entry(tinfo, &targets, list) {
>> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>          if (!existed)
>>                  return -ENOENT;
>>
>> -       /* Make sure user supplied flags are target expected. */
>> -       target_fd = attr->link_create.target_fd;
>> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
>> -               return -EINVAL;
>> -       if (!attr->link_create.flags && target_fd)
>> -               return -EINVAL;
>> -
> 
> Please still ensure that no flags are specified.

Make sense. I also need to ensure target_fd is 0 since it is not used 
any more.

> 
> 
>>          link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
>>          if (!link)
>>                  return -ENOMEM;
>> @@ -431,28 +438,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>                  return err;
>>          }
>>
> 
> [...]
> 
>> -static int bpf_iter_check_map(struct bpf_prog *prog,
>> -                             struct bpf_iter_aux_info *aux)
>> +static int bpf_iter_attach_map(struct bpf_prog *prog,
>> +                              union bpf_iter_link_info *linfo,
>> +                              struct bpf_iter_aux_info *aux)
>>   {
>> -       struct bpf_map *map = aux->map;
>> +       struct bpf_map *map;
>> +       int err = -EINVAL;
>>
>> -       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
>> +       if (!linfo->map.map_fd)
>>                  return -EINVAL;
> 
> This could be -EBADF?

Good suggestion. Will do.

> 
>>
>> -       if (prog->aux->max_rdonly_access > map->value_size)
>> -               return -EACCES;
>> +       map = bpf_map_get_with_uref(linfo->map.map_fd);
>> +       if (IS_ERR(map))
>> +               return PTR_ERR(map);
>> +
>> +       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
>> +               goto put_map;
>> +
>> +       if (prog->aux->max_rdonly_access > map->value_size) {
>> +               err = -EACCES;
>> +               goto put_map;
>> +       }
> 
> [...]
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH bpf-next 2/2] libbpf: support new uapi for map element bpf iterator
  2020-08-03  1:35   ` Andrii Nakryiko
@ 2020-08-03  2:30     ` Yonghong Song
  0 siblings, 0 replies; 9+ messages in thread
From: Yonghong Song @ 2020-08-03  2:30 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann, Kernel Team



On 8/2/20 6:35 PM, Andrii Nakryiko wrote:
> On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> Previous commit adjusted kernel uapi for map
>> element bpf iterator. This patch adjusted libbpf API
>> due to uapi change.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   tools/lib/bpf/bpf.c    | 4 +++-
>>   tools/lib/bpf/bpf.h    | 5 +++--
>>   tools/lib/bpf/libbpf.c | 7 +++++--
>>   3 files changed, 11 insertions(+), 5 deletions(-)
>>
>> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
>> index eab14c97c15d..c75a84398d51 100644
>> --- a/tools/lib/bpf/bpf.c
>> +++ b/tools/lib/bpf/bpf.c
>> @@ -598,7 +598,9 @@ int bpf_link_create(int prog_fd, int target_fd,
>>          attr.link_create.prog_fd = prog_fd;
>>          attr.link_create.target_fd = target_fd;
>>          attr.link_create.attach_type = attach_type;
>> -       attr.link_create.flags = OPTS_GET(opts, flags, 0);
>> +       attr.link_create.iter_info =
>> +               ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
>> +       attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
>>
>>          return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
>>   }
>> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
>> index 28855fd5b5f4..c9895f191305 100644
>> --- a/tools/lib/bpf/bpf.h
>> +++ b/tools/lib/bpf/bpf.h
>> @@ -170,9 +170,10 @@ LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
>>
>>   struct bpf_link_create_opts {
>>          size_t sz; /* size of this struct for forward/backward compatibility */
>> -       __u32 flags;
> 
> I'd actually keep flags in link_create_ops, as it's part of the kernel
> UAPI anyways, we won't have to add it later. Just pass it through into
> bpf_attr.

Okay. Will keep it.

> 
>> +       union bpf_iter_link_info *iter_info;
>> +       __u32 iter_info_len;
>>   };
>> -#define bpf_link_create_opts__last_field flags
>> +#define bpf_link_create_opts__last_field iter_info_len
>>
>>   LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
>>                                 enum bpf_attach_type attach_type,
>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
>> index 7be04e45d29c..dc8fabf9d30d 100644
>> --- a/tools/lib/bpf/libbpf.c
>> +++ b/tools/lib/bpf/libbpf.c
>> @@ -8298,6 +8298,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
>>                           const struct bpf_iter_attach_opts *opts)
>>   {
>>          DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
>> +       union bpf_iter_link_info linfo;
>>          char errmsg[STRERR_BUFSIZE];
>>          struct bpf_link *link;
>>          int prog_fd, link_fd;
>> @@ -8307,8 +8308,10 @@ bpf_program__attach_iter(struct bpf_program *prog,
>>                  return ERR_PTR(-EINVAL);
>>
>>          if (OPTS_HAS(opts, map_fd)) {
>> -               target_fd = opts->map_fd;
>> -               link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
>> +               memset(&linfo, 0, sizeof(linfo));
>> +               linfo.map.map_fd = opts->map_fd;
>> +               link_create_opts.iter_info = &linfo;
>> +               link_create_opts.iter_info_len = sizeof(linfo);
> 
> Maybe instead of having map_fd directly in bpf_iter_attach_opts, let's
> just accept bpf_iter_link_info and its len directly from the user?
> Right now kernel UAPI and libbpf API for customizing iterator
> attachment differ. It would be simpler to keep them in sync and we
> won't have to discuss how to evolve bpf_iter_attach_opts as we add
> more customization for different types of iterators. Thoughts?

Good suggestion. Previously we don't have a structure to encapsulate
map_fd so map_fd is added to the bpf_iter_attach_opts. Indeed, we can
directly add bpf_iter_link_info to link_iter_attach_opts, and this
will free libbpf from handling any future additions in bpf_iter_link_info.

> 
>>          }
>>
>>          prog_fd = bpf_program__fd(prog);
>> --
>> 2.24.1
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: change uapi for bpf iterator map elements
  2020-08-03  2:23     ` Yonghong Song
@ 2020-08-03  5:11       ` Andrii Nakryiko
  2020-08-03  6:21         ` Yonghong Song
  0 siblings, 1 reply; 9+ messages in thread
From: Andrii Nakryiko @ 2020-08-03  5:11 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann, Kernel Team

On Sun, Aug 2, 2020 at 7:23 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 8/2/20 6:25 PM, Andrii Nakryiko wrote:
> > On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
> >> map elements") added bpf iterator support for
> >> map elements. The map element bpf iterator requires
> >> info to identify a particular map. In the above
> >> commit, the attr->link_create.target_fd is used
> >> to carry map_fd and an enum bpf_iter_link_info
> >> is added to uapi to specify the target_fd actually
> >> representing a map_fd:
> >>      enum bpf_iter_link_info {
> >>          BPF_ITER_LINK_UNSPEC = 0,
> >>          BPF_ITER_LINK_MAP_FD = 1,
> >>
> >>          MAX_BPF_ITER_LINK_INFO,
> >>      };
> >>
> >> This is an extensible approach as we can grow
> >> enumerator for pid, cgroup_id, etc. and we can
> >> unionize target_fd for pid, cgroup_id, etc.
> >> But in the future, there are chances that
> >> more complex customization may happen, e.g.,
> >> for tasks, it could be filtered based on
> >> both cgroup_id and user_id.
> >>
> >> This patch changed the uapi to have fields
> >>          __aligned_u64   iter_info;
> >>          __u32           iter_info_len;
> >> for additional iter_info for link_create.
> >> The iter_info is defined as
> >>          union bpf_iter_link_info {
> >>                  struct {
> >>                          __u32   map_fd;
> >>                  } map;
> >>          };
> >>
> >> So future extension for additional customization
> >> will be easier. The bpf_iter_link_info will be
> >> passed to target callback to validate and generic
> >> bpf_iter framework does not need to deal it any
> >> more.
> >>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >>   include/linux/bpf.h            | 10 ++++---
> >>   include/uapi/linux/bpf.h       | 15 +++++-----
> >>   kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
> >>   kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
> >>   kernel/bpf/syscall.c           |  2 +-
> >>   net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
> >>   tools/include/uapi/linux/bpf.h | 15 +++++-----
> >>   7 files changed, 104 insertions(+), 64 deletions(-)
> >>
> >
> > [...]
> >
> >>   int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>   {
> >> +       union bpf_iter_link_info __user *ulinfo;
> >>          struct bpf_link_primer link_primer;
> >>          struct bpf_iter_target_info *tinfo;
> >> -       struct bpf_iter_aux_info aux = {};
> >> +       union bpf_iter_link_info linfo;
> >>          struct bpf_iter_link *link;
> >> -       u32 prog_btf_id, target_fd;
> >> +       u32 prog_btf_id, linfo_len;
> >>          bool existed = false;
> >> -       struct bpf_map *map;
> >>          int err;
> >>
> >> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
> >> +
> >> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
> >> +       linfo_len = attr->link_create.iter_info_len;
> >> +       if (ulinfo && linfo_len) {
> >
> > We probably want to be more strict here: if either pointer or len is
> > non-zero, both should be present and valid. Otherwise we can have
> > garbage in iter_info, as long as iter_info_len is zero.
>
> yes, it is possible iter_info_len = 0 and iter_info is not null and
> if this happens, iter_info will not be examined.
>
> in kernel, we have places this is handled similarly. For example,
> for cgroup bpf_prog query.
>
> kernel/bpf/cgroup.c, function __cgroup_bpf_query
>
>    __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
>    ...
>    if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
>      return 0;
>
> In the above case, it is possible prog_cnt = 0 and prog_ids != NULL,
> or prog_ids == NULL and prog_cnt != 0, and we won't return error
> to user space.
>
> Not 100% sure whether we have convention here or not.

I don't know either, but I'd assume that we didn't think about 100%
strictness when originally implementing this. So I'd go with a very
strict check for this new functionality.

>
> >
> >> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
> >> +                                              linfo_len);
> >> +               if (err)
> >> +                       return err;
> >> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
> >> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
> >> +                       return -EFAULT;
> >> +       }
> >> +
> >>          prog_btf_id = prog->aux->attach_btf_id;
> >>          mutex_lock(&targets_mutex);
> >>          list_for_each_entry(tinfo, &targets, list) {
> >> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>          if (!existed)
> >>                  return -ENOENT;
> >>
> >> -       /* Make sure user supplied flags are target expected. */
> >> -       target_fd = attr->link_create.target_fd;
> >> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
> >> -               return -EINVAL;
> >> -       if (!attr->link_create.flags && target_fd)
> >> -               return -EINVAL;
> >> -
> >
> > Please still ensure that no flags are specified.
>
> Make sense. I also need to ensure target_fd is 0 since it is not used
> any more.
>

yep, good catch

> >
> >
> >>          link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
> >>          if (!link)
> >>                  return -ENOMEM;
> >> @@ -431,28 +438,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>                  return err;
> >>          }
> >>
> >
> > [...]
> >
> >> -static int bpf_iter_check_map(struct bpf_prog *prog,
> >> -                             struct bpf_iter_aux_info *aux)
> >> +static int bpf_iter_attach_map(struct bpf_prog *prog,
> >> +                              union bpf_iter_link_info *linfo,
> >> +                              struct bpf_iter_aux_info *aux)
> >>   {
> >> -       struct bpf_map *map = aux->map;
> >> +       struct bpf_map *map;
> >> +       int err = -EINVAL;
> >>
> >> -       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> >> +       if (!linfo->map.map_fd)
> >>                  return -EINVAL;
> >
> > This could be -EBADF?
>
> Good suggestion. Will do.
>
> >
> >>
> >> -       if (prog->aux->max_rdonly_access > map->value_size)
> >> -               return -EACCES;
> >> +       map = bpf_map_get_with_uref(linfo->map.map_fd);
> >> +       if (IS_ERR(map))
> >> +               return PTR_ERR(map);
> >> +
> >> +       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> >> +               goto put_map;
> >> +
> >> +       if (prog->aux->max_rdonly_access > map->value_size) {
> >> +               err = -EACCES;
> >> +               goto put_map;
> >> +       }
> >
> > [...]
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: change uapi for bpf iterator map elements
  2020-08-03  5:11       ` Andrii Nakryiko
@ 2020-08-03  6:21         ` Yonghong Song
  0 siblings, 0 replies; 9+ messages in thread
From: Yonghong Song @ 2020-08-03  6:21 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann, Kernel Team



On 8/2/20 10:11 PM, Andrii Nakryiko wrote:
> On Sun, Aug 2, 2020 at 7:23 PM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 8/2/20 6:25 PM, Andrii Nakryiko wrote:
>>> On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@fb.com> wrote:
>>>>
>>>> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
>>>> map elements") added bpf iterator support for
>>>> map elements. The map element bpf iterator requires
>>>> info to identify a particular map. In the above
>>>> commit, the attr->link_create.target_fd is used
>>>> to carry map_fd and an enum bpf_iter_link_info
>>>> is added to uapi to specify the target_fd actually
>>>> representing a map_fd:
>>>>       enum bpf_iter_link_info {
>>>>           BPF_ITER_LINK_UNSPEC = 0,
>>>>           BPF_ITER_LINK_MAP_FD = 1,
>>>>
>>>>           MAX_BPF_ITER_LINK_INFO,
>>>>       };
>>>>
>>>> This is an extensible approach as we can grow
>>>> enumerator for pid, cgroup_id, etc. and we can
>>>> unionize target_fd for pid, cgroup_id, etc.
>>>> But in the future, there are chances that
>>>> more complex customization may happen, e.g.,
>>>> for tasks, it could be filtered based on
>>>> both cgroup_id and user_id.
>>>>
>>>> This patch changed the uapi to have fields
>>>>           __aligned_u64   iter_info;
>>>>           __u32           iter_info_len;
>>>> for additional iter_info for link_create.
>>>> The iter_info is defined as
>>>>           union bpf_iter_link_info {
>>>>                   struct {
>>>>                           __u32   map_fd;
>>>>                   } map;
>>>>           };
>>>>
>>>> So future extension for additional customization
>>>> will be easier. The bpf_iter_link_info will be
>>>> passed to target callback to validate and generic
>>>> bpf_iter framework does not need to deal it any
>>>> more.
>>>>
>>>> Signed-off-by: Yonghong Song <yhs@fb.com>
>>>> ---
>>>>    include/linux/bpf.h            | 10 ++++---
>>>>    include/uapi/linux/bpf.h       | 15 +++++-----
>>>>    kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
>>>>    kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
>>>>    kernel/bpf/syscall.c           |  2 +-
>>>>    net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
>>>>    tools/include/uapi/linux/bpf.h | 15 +++++-----
>>>>    7 files changed, 104 insertions(+), 64 deletions(-)
>>>>
>>>
>>> [...]
>>>
>>>>    int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>>>    {
>>>> +       union bpf_iter_link_info __user *ulinfo;
>>>>           struct bpf_link_primer link_primer;
>>>>           struct bpf_iter_target_info *tinfo;
>>>> -       struct bpf_iter_aux_info aux = {};
>>>> +       union bpf_iter_link_info linfo;
>>>>           struct bpf_iter_link *link;
>>>> -       u32 prog_btf_id, target_fd;
>>>> +       u32 prog_btf_id, linfo_len;
>>>>           bool existed = false;
>>>> -       struct bpf_map *map;
>>>>           int err;
>>>>
>>>> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
>>>> +
>>>> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
>>>> +       linfo_len = attr->link_create.iter_info_len;
>>>> +       if (ulinfo && linfo_len) {
>>>
>>> We probably want to be more strict here: if either pointer or len is
>>> non-zero, both should be present and valid. Otherwise we can have
>>> garbage in iter_info, as long as iter_info_len is zero.
>>
>> yes, it is possible iter_info_len = 0 and iter_info is not null and
>> if this happens, iter_info will not be examined.
>>
>> in kernel, we have places this is handled similarly. For example,
>> for cgroup bpf_prog query.
>>
>> kernel/bpf/cgroup.c, function __cgroup_bpf_query
>>
>>     __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
>>     ...
>>     if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
>>       return 0;
>>
>> In the above case, it is possible prog_cnt = 0 and prog_ids != NULL,
>> or prog_ids == NULL and prog_cnt != 0, and we won't return error
>> to user space.
>>
>> Not 100% sure whether we have convention here or not.
> 
> I don't know either, but I'd assume that we didn't think about 100%
> strictness when originally implementing this. So I'd go with a very
> strict check for this new functionality.

Agreed. This should be fine as the functionality is new.

> 
>>
>>>
>>>> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
>>>> +                                              linfo_len);
>>>> +               if (err)
>>>> +                       return err;
>>>> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
>>>> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
>>>> +                       return -EFAULT;
>>>> +       }
>>>> +
>>>>           prog_btf_id = prog->aux->attach_btf_id;
>>>>           mutex_lock(&targets_mutex);
>>>>           list_for_each_entry(tinfo, &targets, list) {
>>>> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
>>>>           if (!existed)
>>>>                   return -ENOENT;
>>>>
>>>> -       /* Make sure user supplied flags are target expected. */
>>>> -       target_fd = attr->link_create.target_fd;
>>>> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
>>>> -               return -EINVAL;
>>>> -       if (!attr->link_create.flags && target_fd)
>>>> -               return -EINVAL;
>>>> -
>>>
>>> Please still ensure that no flags are specified.
>>
>> Make sense. I also need to ensure target_fd is 0 since it is not used
>> any more.
>>
> 
> yep, good catch
> 
>>>
>>>
>>>>           link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
>>>>           if (!link)
>>>>                   return -ENOMEM;
[...]

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-08-03  6:21 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-02  4:21 [PATCH bpf-next 0/2] bpf: change uapi for bpf iterator map elements Yonghong Song
2020-08-02  4:21 ` [PATCH bpf-next 1/2] " Yonghong Song
2020-08-03  1:25   ` Andrii Nakryiko
2020-08-03  2:23     ` Yonghong Song
2020-08-03  5:11       ` Andrii Nakryiko
2020-08-03  6:21         ` Yonghong Song
2020-08-02  4:21 ` [PATCH bpf-next 2/2] libbpf: support new uapi for map element bpf iterator Yonghong Song
2020-08-03  1:35   ` Andrii Nakryiko
2020-08-03  2:30     ` Yonghong Song

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.