All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next v3 1/2] bpf: change uapi for bpf iterator map elements
  2020-08-03 22:43 [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements Yonghong Song
@ 2020-08-03 22:43 ` Yonghong Song
  2020-08-04  0:37   ` John Fastabend
  2020-08-03 22:43 ` [PATCH bpf-next v3 2/2] tools/bpf: support new uapi for map element bpf iterator Yonghong Song
  2020-08-03 23:10 ` [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements Andrii Nakryiko
  2 siblings, 1 reply; 6+ messages in thread
From: Yonghong Song @ 2020-08-03 22:43 UTC (permalink / raw)
  To: bpf, netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
map elements") added bpf iterator support for
map elements. The map element bpf iterator requires
info to identify a particular map. In the above
commit, the attr->link_create.target_fd is used
to carry map_fd and an enum bpf_iter_link_info
is added to uapi to specify the target_fd actually
representing a map_fd:
    enum bpf_iter_link_info {
	BPF_ITER_LINK_UNSPEC = 0,
	BPF_ITER_LINK_MAP_FD = 1,

	MAX_BPF_ITER_LINK_INFO,
    };

This is an extensible approach as we can grow
enumerator for pid, cgroup_id, etc. and we can
unionize target_fd for pid, cgroup_id, etc.
But in the future, there are chances that
more complex customization may happen, e.g.,
for tasks, it could be filtered based on
both cgroup_id and user_id.

This patch changed the uapi to have fields
	__aligned_u64	iter_info;
	__u32		iter_info_len;
for additional iter_info for link_create.
The iter_info is defined as
	union bpf_iter_link_info {
		struct {
			__u32   map_fd;
		} map;
	};

So future extension for additional customization
will be easier. The bpf_iter_link_info will be
passed to target callback to validate and generic
bpf_iter framework does not need to deal it any
more.

Note that map_fd = 0 will be considered invalid
and -EBADF will be returned to user space.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h       | 10 ++++---
 include/uapi/linux/bpf.h  | 15 +++++-----
 kernel/bpf/bpf_iter.c     | 58 +++++++++++++++++++--------------------
 kernel/bpf/map_iter.c     | 37 +++++++++++++++++++------
 kernel/bpf/syscall.c      |  2 +-
 net/core/bpf_sk_storage.c | 37 +++++++++++++++++++------
 6 files changed, 102 insertions(+), 57 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cef4ef0d2b4e..55f694b63164 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info {
 	struct bpf_map *map;
 };
 
-typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
-				       struct bpf_iter_aux_info *aux);
+typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
+					union bpf_iter_link_info *linfo,
+					struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
 
 #define BPF_ITER_CTX_ARG_MAX 2
 struct bpf_iter_reg {
 	const char *target;
-	bpf_iter_check_target_t check_target;
+	bpf_iter_attach_target_t attach_target;
+	bpf_iter_detach_target_t detach_target;
 	u32 ctx_arg_info_size;
-	enum bpf_iter_link_info req_linfo;
 	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
 	const struct bpf_iter_seq_info *seq_info;
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b134e679e9db..0480f893facd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type */
 };
 
+union bpf_iter_link_info {
+	struct {
+		__u32	map_fd;
+	} map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-	BPF_ITER_LINK_UNSPEC = 0,
-	BPF_ITER_LINK_MAP_FD = 1,
-
-	MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
+		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+		__u32		iter_info_len;	/* iter_info length */
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 363b9cafc2d8..b6715964b685 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link)
 	struct bpf_iter_link *iter_link =
 		container_of(link, struct bpf_iter_link, link);
 
-	if (iter_link->aux.map)
-		bpf_map_put_with_uref(iter_link->aux.map);
+	if (iter_link->tinfo->reg_info->detach_target)
+		iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
 }
 
 static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link)
 
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
+	union bpf_iter_link_info __user *ulinfo;
 	struct bpf_link_primer link_primer;
 	struct bpf_iter_target_info *tinfo;
-	struct bpf_iter_aux_info aux = {};
+	union bpf_iter_link_info linfo;
 	struct bpf_iter_link *link;
-	u32 prog_btf_id, target_fd;
+	u32 prog_btf_id, linfo_len;
 	bool existed = false;
-	struct bpf_map *map;
 	int err;
 
+	if (attr->link_create.target_fd || attr->link_create.flags)
+		return -EINVAL;
+
+	memset(&linfo, 0, sizeof(union bpf_iter_link_info));
+
+	ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
+	linfo_len = attr->link_create.iter_info_len;
+	if (!ulinfo ^ !linfo_len)
+		return -EINVAL;
+
+	if (ulinfo) {
+		err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
+					       linfo_len);
+		if (err)
+			return err;
+		linfo_len = min_t(u32, linfo_len, sizeof(linfo));
+		if (copy_from_user(&linfo, ulinfo, linfo_len))
+			return -EFAULT;
+	}
+
 	prog_btf_id = prog->aux->attach_btf_id;
 	mutex_lock(&targets_mutex);
 	list_for_each_entry(tinfo, &targets, list) {
@@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	if (!existed)
 		return -ENOENT;
 
-	/* Make sure user supplied flags are target expected. */
-	target_fd = attr->link_create.target_fd;
-	if (attr->link_create.flags != tinfo->reg_info->req_linfo)
-		return -EINVAL;
-	if (!attr->link_create.flags && target_fd)
-		return -EINVAL;
-
 	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
 	if (!link)
 		return -ENOMEM;
@@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 		return err;
 	}
 
-	if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
-		map = bpf_map_get_with_uref(target_fd);
-		if (IS_ERR(map)) {
-			err = PTR_ERR(map);
-			goto cleanup_link;
-		}
-
-		aux.map = map;
-		err = tinfo->reg_info->check_target(prog, &aux);
+	if (tinfo->reg_info->attach_target) {
+		err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
 		if (err) {
-			bpf_map_put_with_uref(map);
-			goto cleanup_link;
+			bpf_link_cleanup(&link_primer);
+			return err;
 		}
-
-		link->aux.map = map;
 	}
 
 	return bpf_link_settle(&link_primer);
-
-cleanup_link:
-	bpf_link_cleanup(&link_primer);
-	return err;
 }
 
 static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index fbe1f557cb88..af86048e5afd 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = {
 	.seq_info		= &bpf_map_seq_info,
 };
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-			      struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+			       union bpf_iter_link_info *linfo,
+			       struct bpf_iter_aux_info *aux)
 {
 	u32 key_acc_size, value_acc_size, key_size, value_size;
-	struct bpf_map *map = aux->map;
+	struct bpf_map *map;
 	bool is_percpu = false;
+	int err = -EINVAL;
+
+	if (!linfo->map.map_fd)
+		return -EBADF;
+
+	map = bpf_map_get_with_uref(linfo->map.map_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
 	else if (map->map_type != BPF_MAP_TYPE_HASH &&
 		 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
 		 map->map_type != BPF_MAP_TYPE_ARRAY)
-		return -EINVAL;
+		goto put_map;
 
 	key_acc_size = prog->aux->max_rdonly_access;
 	value_acc_size = prog->aux->max_rdwr_access;
@@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
 	else
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
 
-	if (key_acc_size > key_size || value_acc_size > value_size)
-		return -EACCES;
+	if (key_acc_size > key_size || value_acc_size > value_size) {
+		err = -EACCES;
+		goto put_map;
+	}
 
+	aux->map = map;
 	return 0;
+
+put_map:
+	bpf_map_put_with_uref(map);
+	return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+	bpf_map_put_with_uref(aux->map);
 }
 
 DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
@@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
 
 static const struct bpf_iter_reg bpf_map_elem_reg_info = {
 	.target			= "bpf_map_elem",
-	.check_target		= bpf_iter_check_map,
-	.req_linfo		= BPF_ITER_LINK_MAP_FD,
+	.attach_target		= bpf_iter_attach_map,
+	.detach_target		= bpf_iter_detach_map,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_map_elem, key),
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2f343ce15747..86299a292214 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
 	return -EINVAL;
 }
 
-#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
+#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
 static int link_create(union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d3377c90a291..b988f48153a4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data,
 	return 0;
 }
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-			      struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+			       union bpf_iter_link_info *linfo,
+			       struct bpf_iter_aux_info *aux)
 {
-	struct bpf_map *map = aux->map;
+	struct bpf_map *map;
+	int err = -EINVAL;
+
+	if (!linfo->map.map_fd)
+		return -EBADF;
+
+	map = bpf_map_get_with_uref(linfo->map.map_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
-		return -EINVAL;
+		goto put_map;
 
-	if (prog->aux->max_rdonly_access > map->value_size)
-		return -EACCES;
+	if (prog->aux->max_rdonly_access > map->value_size) {
+		err = -EACCES;
+		goto put_map;
+	}
 
+	aux->map = map;
 	return 0;
+
+put_map:
+	bpf_map_put_with_uref(map);
+	return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+	bpf_map_put_with_uref(aux->map);
 }
 
 static const struct seq_operations bpf_sk_storage_map_seq_ops = {
@@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 
 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
 	.target			= "bpf_sk_storage_map",
-	.check_target		= bpf_iter_check_map,
-	.req_linfo		= BPF_ITER_LINK_MAP_FD,
+	.attach_target		= bpf_iter_attach_map,
+	.detach_target		= bpf_iter_detach_map,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements
@ 2020-08-03 22:43 Yonghong Song
  2020-08-03 22:43 ` [PATCH bpf-next v3 1/2] " Yonghong Song
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Yonghong Song @ 2020-08-03 22:43 UTC (permalink / raw)
  To: bpf, netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

Andrii raised a concern that current uapi for bpf iterator map
element is a little restrictive and not suitable for future potential
complex customization. This is a valid suggestion, considering people
may indeed add more complex custimization to the iterator, e.g.,
cgroup_id + user_id, etc. for task or task_file. Another example might
be map_id plus additional control so that the bpf iterator may bail
out a bucket earlier if a bucket has too many elements which may hold
lock too long and impact other parts of systems.

Patch #1 modified uapi with kernel changes. Patch #2
adjusted libbpf api accordingly.

Changelogs:
  v2 -> v3:
    . undo "not reject iter_info.map.map_fd == 0" from v1.
      In the future map_fd may become optional, so let us use map_fd == 0
      indicating the map_fd is not set by user space.
    . add link_info_len to bpf_iter_attach_opts to ensure always correct
      link_info_len from user. Otherwise, libbpf may deduce incorrect
      link_info_len if it uses different uapi header than the user app.
  v1 -> v2:
    . ensure link_create target_fd/flags == 0 since they are not used. (Andrii)
    . if either of iter_info ptr == 0 or iter_info_len == 0, but not both,
      return error to user space. (Andrii)
    . do not reject iter_info.map.map_fd == 0, go ahead to use it trying to
      get a map reference since the map_fd is required for map_elem iterator.
    . use bpf_iter_link_info in bpf_iter_attach_opts instead of map_fd.
      this way, user space is responsible to set up bpf_iter_link_info and
      libbpf just passes the data to the kernel, simplifying libbpf design.
      (Andrii)

Yonghong Song (2):
  bpf: change uapi for bpf iterator map elements
  tools/bpf: support new uapi for map element bpf iterator

 include/linux/bpf.h                           | 10 ++--
 include/uapi/linux/bpf.h                      | 15 ++---
 kernel/bpf/bpf_iter.c                         | 58 +++++++++----------
 kernel/bpf/map_iter.c                         | 37 +++++++++---
 kernel/bpf/syscall.c                          |  2 +-
 net/core/bpf_sk_storage.c                     | 37 +++++++++---
 tools/bpf/bpftool/iter.c                      |  9 ++-
 tools/include/uapi/linux/bpf.h                | 15 ++---
 tools/lib/bpf/bpf.c                           |  3 +
 tools/lib/bpf/bpf.h                           |  4 +-
 tools/lib/bpf/libbpf.c                        |  6 +-
 tools/lib/bpf/libbpf.h                        |  5 +-
 .../selftests/bpf/prog_tests/bpf_iter.c       | 40 ++++++++++---
 13 files changed, 159 insertions(+), 82 deletions(-)

-- 
2.24.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH bpf-next v3 2/2] tools/bpf: support new uapi for map element bpf iterator
  2020-08-03 22:43 [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements Yonghong Song
  2020-08-03 22:43 ` [PATCH bpf-next v3 1/2] " Yonghong Song
@ 2020-08-03 22:43 ` Yonghong Song
  2020-08-04  0:39   ` John Fastabend
  2020-08-03 23:10 ` [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements Andrii Nakryiko
  2 siblings, 1 reply; 6+ messages in thread
From: Yonghong Song @ 2020-08-03 22:43 UTC (permalink / raw)
  To: bpf, netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

Previous commit adjusted kernel uapi for map
element bpf iterator. This patch adjusted libbpf API
due to uapi change. bpftool and bpf_iter selftests
are also changed accordingly.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/bpf/bpftool/iter.c                      |  9 +++--
 tools/include/uapi/linux/bpf.h                | 15 +++----
 tools/lib/bpf/bpf.c                           |  3 ++
 tools/lib/bpf/bpf.h                           |  4 +-
 tools/lib/bpf/libbpf.c                        |  6 +--
 tools/lib/bpf/libbpf.h                        |  5 ++-
 .../selftests/bpf/prog_tests/bpf_iter.c       | 40 +++++++++++++++----
 7 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index c9dba7543dba..3b1aad7535dd 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -11,6 +11,7 @@
 static int do_pin(int argc, char **argv)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts);
+	union bpf_iter_link_info linfo;
 	const char *objfile, *path;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
@@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv)
 			map_fd = map_parse_fd(&argc, &argv);
 			if (map_fd < 0)
 				return -1;
+
+			memset(&linfo, 0, sizeof(linfo));
+			linfo.map.map_fd = map_fd;
+			iter_opts.link_info = &linfo;
+			iter_opts.link_info_len = sizeof(linfo);
 		}
 	}
 
@@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv)
 		goto close_obj;
 	}
 
-	if (map_fd >= 0)
-		iter_opts.map_fd = map_fd;
-
 	link = bpf_program__attach_iter(prog, &iter_opts);
 	if (IS_ERR(link)) {
 		err = PTR_ERR(link);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b134e679e9db..0480f893facd 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type */
 };
 
+union bpf_iter_link_info {
+	struct {
+		__u32	map_fd;
+	} map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-	BPF_ITER_LINK_UNSPEC = 0,
-	BPF_ITER_LINK_MAP_FD = 1,
-
-	MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
+		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+		__u32		iter_info_len;	/* iter_info length */
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index eab14c97c15d..0750681057c2 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd,
 	attr.link_create.target_fd = target_fd;
 	attr.link_create.attach_type = attach_type;
 	attr.link_create.flags = OPTS_GET(opts, flags, 0);
+	attr.link_create.iter_info =
+		ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+	attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
 
 	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
 }
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 28855fd5b5f4..832c7615af80 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -171,8 +171,10 @@ LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
 struct bpf_link_create_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 	__u32 flags;
+	union bpf_iter_link_info *iter_info;
+	__u32 iter_info_len;
 };
-#define bpf_link_create_opts__last_field flags
+#define bpf_link_create_opts__last_field iter_info_len
 
 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
 			       enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7be04e45d29c..0a06124f7999 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8306,10 +8306,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
 	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
 		return ERR_PTR(-EINVAL);
 
-	if (OPTS_HAS(opts, map_fd)) {
-		target_fd = opts->map_fd;
-		link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
-	}
+	link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
+	link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
 
 	prog_fd = bpf_program__fd(prog);
 	if (prog_fd < 0) {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3ed1399bfbbc..5ecb4069a9f0 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
 
 struct bpf_iter_attach_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
-	__u32 map_fd;
+	union bpf_iter_link_info *link_info;
+	__u32 link_info_len;
 };
-#define bpf_iter_attach_opts__last_field map_fd
+#define bpf_iter_attach_opts__last_field link_info_len
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach_iter(struct bpf_program *prog,
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 4ffefdc1130f..7375d9a6d242 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -468,6 +468,7 @@ static void test_bpf_hash_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_hash_map *skel;
 	int err, i, len, map_fd, iter_fd;
+	union bpf_iter_link_info linfo;
 	__u64 val, expected_val = 0;
 	struct bpf_link *link;
 	struct key_t {
@@ -490,13 +491,16 @@ static void test_bpf_hash_map(void)
 		goto out;
 
 	/* iterator with hashmap2 and hashmap3 should fail */
-	opts.map_fd = bpf_map__fd(skel->maps.hashmap2);
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2);
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
 	if (CHECK(!IS_ERR(link), "attach_iter",
 		  "attach_iter for hashmap2 unexpected succeeded\n"))
 		goto out;
 
-	opts.map_fd = bpf_map__fd(skel->maps.hashmap3);
+	linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
 	if (CHECK(!IS_ERR(link), "attach_iter",
 		  "attach_iter for hashmap3 unexpected succeeded\n"))
@@ -519,7 +523,7 @@ static void test_bpf_hash_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	linfo.map.map_fd = map_fd;
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_percpu_hash_map *skel;
 	int err, i, j, len, map_fd, iter_fd;
+	union bpf_iter_link_info linfo;
 	__u32 expected_val = 0;
 	struct bpf_link *link;
 	struct key_t {
@@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -649,6 +657,7 @@ static void test_bpf_array_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	__u32 expected_key = 0, res_first_key;
 	struct bpf_iter_bpf_array_map *skel;
+	union bpf_iter_link_info linfo;
 	int err, i, map_fd, iter_fd;
 	struct bpf_link *link;
 	char buf[64] = {};
@@ -673,7 +682,10 @@ static void test_bpf_array_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_percpu_array_map *skel;
 	__u32 expected_key = 0, expected_val = 0;
+	union bpf_iter_link_info linfo;
 	int err, i, j, map_fd, iter_fd;
 	struct bpf_link *link;
 	char buf[64];
@@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void)
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	int err, i, len, map_fd, iter_fd, num_sockets;
 	struct bpf_iter_bpf_sk_storage_map *skel;
+	union bpf_iter_link_info linfo;
 	int sock_fd[3] = {-1, -1, -1};
 	__u32 val, expected_val = 0;
 	struct bpf_link *link;
@@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void)
 			goto out;
 	}
 
-	opts.map_fd = map_fd;
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
 	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
 		goto out;
@@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_test_kern5 *skel;
+	union bpf_iter_link_info linfo;
 	struct bpf_link *link;
 
 	skel = bpf_iter_test_kern5__open_and_load();
@@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void)
 		  "skeleton open_and_load failed\n"))
 		return;
 
-	opts.map_fd = bpf_map__fd(skel->maps.hashmap1);
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1);
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
 	if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
 		bpf_link__destroy(link);
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements
  2020-08-03 22:43 [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements Yonghong Song
  2020-08-03 22:43 ` [PATCH bpf-next v3 1/2] " Yonghong Song
  2020-08-03 22:43 ` [PATCH bpf-next v3 2/2] tools/bpf: support new uapi for map element bpf iterator Yonghong Song
@ 2020-08-03 23:10 ` Andrii Nakryiko
  2 siblings, 0 replies; 6+ messages in thread
From: Andrii Nakryiko @ 2020-08-03 23:10 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann, Kernel Team

On Mon, Aug 3, 2020 at 3:44 PM Yonghong Song <yhs@fb.com> wrote:
>
> Andrii raised a concern that current uapi for bpf iterator map
> element is a little restrictive and not suitable for future potential
> complex customization. This is a valid suggestion, considering people
> may indeed add more complex custimization to the iterator, e.g.,
> cgroup_id + user_id, etc. for task or task_file. Another example might
> be map_id plus additional control so that the bpf iterator may bail
> out a bucket earlier if a bucket has too many elements which may hold
> lock too long and impact other parts of systems.
>
> Patch #1 modified uapi with kernel changes. Patch #2
> adjusted libbpf api accordingly.
>
> Changelogs:
>   v2 -> v3:
>     . undo "not reject iter_info.map.map_fd == 0" from v1.
>       In the future map_fd may become optional, so let us use map_fd == 0
>       indicating the map_fd is not set by user space.
>     . add link_info_len to bpf_iter_attach_opts to ensure always correct
>       link_info_len from user. Otherwise, libbpf may deduce incorrect
>       link_info_len if it uses different uapi header than the user app.
>   v1 -> v2:
>     . ensure link_create target_fd/flags == 0 since they are not used. (Andrii)
>     . if either of iter_info ptr == 0 or iter_info_len == 0, but not both,
>       return error to user space. (Andrii)
>     . do not reject iter_info.map.map_fd == 0, go ahead to use it trying to
>       get a map reference since the map_fd is required for map_elem iterator.
>     . use bpf_iter_link_info in bpf_iter_attach_opts instead of map_fd.
>       this way, user space is responsible to set up bpf_iter_link_info and
>       libbpf just passes the data to the kernel, simplifying libbpf design.
>       (Andrii)
>
> Yonghong Song (2):
>   bpf: change uapi for bpf iterator map elements
>   tools/bpf: support new uapi for map element bpf iterator
>
>  include/linux/bpf.h                           | 10 ++--
>  include/uapi/linux/bpf.h                      | 15 ++---
>  kernel/bpf/bpf_iter.c                         | 58 +++++++++----------
>  kernel/bpf/map_iter.c                         | 37 +++++++++---
>  kernel/bpf/syscall.c                          |  2 +-
>  net/core/bpf_sk_storage.c                     | 37 +++++++++---
>  tools/bpf/bpftool/iter.c                      |  9 ++-
>  tools/include/uapi/linux/bpf.h                | 15 ++---
>  tools/lib/bpf/bpf.c                           |  3 +
>  tools/lib/bpf/bpf.h                           |  4 +-
>  tools/lib/bpf/libbpf.c                        |  6 +-
>  tools/lib/bpf/libbpf.h                        |  5 +-
>  .../selftests/bpf/prog_tests/bpf_iter.c       | 40 ++++++++++---
>  13 files changed, 159 insertions(+), 82 deletions(-)
>
> --
> 2.24.1
>


Looks great, thanks!

Acked-by: Andrii Nakryiko <andriin@fb.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH bpf-next v3 1/2] bpf: change uapi for bpf iterator map elements
  2020-08-03 22:43 ` [PATCH bpf-next v3 1/2] " Yonghong Song
@ 2020-08-04  0:37   ` John Fastabend
  0 siblings, 0 replies; 6+ messages in thread
From: John Fastabend @ 2020-08-04  0:37 UTC (permalink / raw)
  To: Yonghong Song, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

Yonghong Song wrote:
> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
> map elements") added bpf iterator support for
> map elements. The map element bpf iterator requires
> info to identify a particular map. In the above
> commit, the attr->link_create.target_fd is used
> to carry map_fd and an enum bpf_iter_link_info
> is added to uapi to specify the target_fd actually
> representing a map_fd:
>     enum bpf_iter_link_info {
> 	BPF_ITER_LINK_UNSPEC = 0,
> 	BPF_ITER_LINK_MAP_FD = 1,
> 
> 	MAX_BPF_ITER_LINK_INFO,
>     };
> 
> This is an extensible approach as we can grow
> enumerator for pid, cgroup_id, etc. and we can
> unionize target_fd for pid, cgroup_id, etc.
> But in the future, there are chances that
> more complex customization may happen, e.g.,
> for tasks, it could be filtered based on
> both cgroup_id and user_id.
> 
> This patch changed the uapi to have fields
> 	__aligned_u64	iter_info;
> 	__u32		iter_info_len;
> for additional iter_info for link_create.
> The iter_info is defined as
> 	union bpf_iter_link_info {
> 		struct {
> 			__u32   map_fd;
> 		} map;
> 	};
> 
> So future extension for additional customization
> will be easier. The bpf_iter_link_info will be
> passed to target callback to validate and generic
> bpf_iter framework does not need to deal it any
> more.
> 
> Note that map_fd = 0 will be considered invalid
> and -EBADF will be returned to user space.
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---

LGTM. I had to do some git log research on latest bpf iter work though to
parse the commit message, but I needed to do that anyways.

Acked-by: John Fastabend <john.fastabend@gmail.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH bpf-next v3 2/2] tools/bpf: support new uapi for map element bpf iterator
  2020-08-03 22:43 ` [PATCH bpf-next v3 2/2] tools/bpf: support new uapi for map element bpf iterator Yonghong Song
@ 2020-08-04  0:39   ` John Fastabend
  0 siblings, 0 replies; 6+ messages in thread
From: John Fastabend @ 2020-08-04  0:39 UTC (permalink / raw)
  To: Yonghong Song, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team

Yonghong Song wrote:
> Previous commit adjusted kernel uapi for map
> element bpf iterator. This patch adjusted libbpf API
> due to uapi change. bpftool and bpf_iter selftests
> are also changed accordingly.
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---

Acked-by: John Fastabend <john.fastabend@gmail.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-08-04  0:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-03 22:43 [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements Yonghong Song
2020-08-03 22:43 ` [PATCH bpf-next v3 1/2] " Yonghong Song
2020-08-04  0:37   ` John Fastabend
2020-08-03 22:43 ` [PATCH bpf-next v3 2/2] tools/bpf: support new uapi for map element bpf iterator Yonghong Song
2020-08-04  0:39   ` John Fastabend
2020-08-03 23:10 ` [PATCH bpf-next v3 0/2] bpf: change uapi for bpf iterator map elements Andrii Nakryiko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.