All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yonghong Song <yhs@fb.com>
To: Andrii Nakryiko <andriin@fb.com>, <bpf@vger.kernel.org>,
	Martin KaFai Lau <kafai@fb.com>, <netdev@vger.kernel.org>
Cc: Alexei Starovoitov <ast@fb.com>,
	Daniel Borkmann <daniel@iogearbox.net>, <kernel-team@fb.com>
Subject: [PATCH bpf-next v3 06/21] bpf: create anonymous bpf iterator
Date: Wed, 6 May 2020 22:39:21 -0700	[thread overview]
Message-ID: <20200507053921.1542958-1-yhs@fb.com> (raw)
In-Reply-To: <20200507053915.1542140-1-yhs@fb.com>

A new bpf command BPF_ITER_CREATE is added.

The anonymous bpf iterator is seq_file based.
The seq_file private data are referenced by targets.
The bpf_iter infrastructure allocated additional space
at seq_file->private before the space used by targets
to store some meta data, e.g.,
  prog:       prog to run
  session_id: an unique id for each opened seq_file
  seq_num:    how many times bpf programs are queried in this session
  done_stop:  an internal state to decide whether bpf program
              should be called in seq_ops->stop() or not

The seq_num will start from 0 for valid objects.
The bpf program may see the same seq_num more than once if
 - seq_file buffer overflow happens and the same object
   is retried by bpf_seq_read(), or
 - the bpf program explicitly requests a retry of the
   same object

Since module is not supported for bpf_iter, all target
registeration happens at __init time, so there is no
need to change bpf_iter_unreg_target() as it is used
mostly in error path of the init function at which time
no bpf iterators have been created yet.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            |   1 +
 include/uapi/linux/bpf.h       |   6 ++
 kernel/bpf/bpf_iter.c          | 129 +++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  26 +++++++
 tools/include/uapi/linux/bpf.h |   6 ++
 5 files changed, 168 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e93d2d33c82c..80b1b9d8a638 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1144,6 +1144,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int bpf_iter_new_fd(struct bpf_link *link);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2bf33979f9ae..97ceb0f2e539 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@ enum bpf_cmd {
 	BPF_LINK_GET_FD_BY_ID,
 	BPF_LINK_GET_NEXT_ID,
 	BPF_ENABLE_STATS,
+	BPF_ITER_CREATE,
 };
 
 enum bpf_map_type {
@@ -614,6 +615,11 @@ union bpf_attr {
 		__u32		type;
 	} enable_stats;
 
+	struct { /* struct used by BPF_ITER_CREATE command */
+		__u32		link_fd;
+		__u32		flags;
+	} iter_create;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index f198597b0ea4..917df4c69966 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 
 #include <linux/fs.h>
+#include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
 
@@ -20,12 +21,24 @@ struct bpf_iter_link {
 	struct bpf_iter_target_info *tinfo;
 };
 
+struct bpf_iter_priv_data {
+	struct bpf_iter_target_info *tinfo;
+	struct bpf_prog *prog;
+	u64 session_id;
+	u64 seq_num;
+	bool done_stop;
+	u8 target_private[] __aligned(8);
+};
+
 static struct list_head targets = LIST_HEAD_INIT(targets);
 static DEFINE_MUTEX(targets_mutex);
 
 /* protect bpf_iter_link changes */
 static DEFINE_MUTEX(link_mutex);
 
+/* incremented on every opened seq_file */
+static atomic64_t session_id;
+
 /* bpf_seq_read, a customized and simpler version for bpf iterator.
  * no_llseek is assumed for this file.
  * The following are differences from seq_read():
@@ -144,6 +157,33 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 	return copied;
 }
 
+static int iter_release(struct inode *inode, struct file *file)
+{
+	struct bpf_iter_priv_data *iter_priv;
+	struct seq_file *seq;
+
+	seq = file->private_data;
+	if (!seq)
+		return 0;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+
+	if (iter_priv->tinfo->fini_seq_private)
+		iter_priv->tinfo->fini_seq_private(seq->private);
+
+	bpf_prog_put(iter_priv->prog);
+	seq->private = iter_priv;
+
+	return seq_release_private(inode, file);
+}
+
+static const struct file_operations bpf_iter_fops = {
+	.llseek		= no_llseek,
+	.read		= bpf_seq_read,
+	.release	= iter_release,
+};
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;
@@ -304,3 +344,92 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 
 	return bpf_link_settle(&link_primer);
 }
+
+static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
+			  struct bpf_iter_target_info *tinfo,
+			  struct bpf_prog *prog)
+{
+	priv_data->tinfo = tinfo;
+	priv_data->prog = prog;
+	priv_data->session_id = atomic64_inc_return(&session_id);
+	priv_data->seq_num = 0;
+	priv_data->done_stop = false;
+}
+
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
+{
+	struct bpf_iter_priv_data *priv_data;
+	struct bpf_iter_target_info *tinfo;
+	struct bpf_prog *prog;
+	u32 total_priv_dsize;
+	struct seq_file *seq;
+	int err = 0;
+
+	mutex_lock(&link_mutex);
+	prog = link->link.prog;
+	bpf_prog_inc(prog);
+	mutex_unlock(&link_mutex);
+
+	tinfo = link->tinfo;
+	total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
+			   tinfo->seq_priv_size;
+	priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
+	if (!priv_data) {
+		err = -ENOMEM;
+		goto release_prog;
+	}
+
+	if (tinfo->init_seq_private) {
+		err = tinfo->init_seq_private(priv_data->target_private);
+		if (err)
+			goto release_seq_file;
+	}
+
+	init_seq_meta(priv_data, tinfo, prog);
+	seq = file->private_data;
+	seq->private = priv_data->target_private;
+
+	return 0;
+
+release_seq_file:
+	seq_release_private(file->f_inode, file);
+	file->private_data = NULL;
+release_prog:
+	bpf_prog_put(prog);
+	return err;
+}
+
+int bpf_iter_new_fd(struct bpf_link *link)
+{
+	struct file *file;
+	unsigned int flags;
+	int err, fd;
+
+	if (link->ops != &bpf_iter_link_lops)
+		return -EINVAL;
+
+	flags = O_RDONLY | O_CLOEXEC;
+	fd = get_unused_fd_flags(flags);
+	if (fd < 0)
+		return fd;
+
+	file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
+	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
+		goto free_fd;
+	}
+
+	err = prepare_seq_file(file,
+			       container_of(link, struct bpf_iter_link, link));
+	if (err)
+		goto free_file;
+
+	fd_install(fd, file);
+	return fd;
+
+free_file:
+	fput(file);
+free_fd:
+	put_unused_fd(fd);
+	return err;
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6ffe2d8fb6c7..a293e88ee01a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3941,6 +3941,29 @@ static int bpf_enable_stats(union bpf_attr *attr)
 	return -EINVAL;
 }
 
+#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
+
+static int bpf_iter_create(union bpf_attr *attr)
+{
+	struct bpf_link *link;
+	int err;
+
+	if (CHECK_ATTR(BPF_ITER_CREATE))
+		return -EINVAL;
+
+	if (attr->iter_create.flags)
+		return -EINVAL;
+
+	link = bpf_link_get_from_fd(attr->iter_create.link_fd);
+	if (IS_ERR(link))
+		return PTR_ERR(link);
+
+	err = bpf_iter_new_fd(link);
+	bpf_link_put(link);
+
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr;
@@ -4068,6 +4091,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_ENABLE_STATS:
 		err = bpf_enable_stats(&attr);
 		break;
+	case BPF_ITER_CREATE:
+		err = bpf_iter_create(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2bf33979f9ae..97ceb0f2e539 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@ enum bpf_cmd {
 	BPF_LINK_GET_FD_BY_ID,
 	BPF_LINK_GET_NEXT_ID,
 	BPF_ENABLE_STATS,
+	BPF_ITER_CREATE,
 };
 
 enum bpf_map_type {
@@ -614,6 +615,11 @@ union bpf_attr {
 		__u32		type;
 	} enable_stats;
 
+	struct { /* struct used by BPF_ITER_CREATE command */
+		__u32		link_fd;
+		__u32		flags;
+	} iter_create;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
-- 
2.24.1


  parent reply	other threads:[~2020-05-07  5:39 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-07  5:39 [PATCH bpf-next v3 00/21] bpf: implement bpf iterator for kernel data Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 01/21] bpf: implement an interface to register bpf_iter targets Yonghong Song
2020-05-08 18:18   ` Andrii Nakryiko
2020-05-07  5:39 ` [PATCH bpf-next v3 02/21] bpf: allow loading of a bpf_iter program Yonghong Song
2020-05-08 18:20   ` Andrii Nakryiko
2020-05-07  5:39 ` [PATCH bpf-next v3 03/21] bpf: support bpf tracing/iter programs for BPF_LINK_CREATE Yonghong Song
2020-05-08 18:24   ` Andrii Nakryiko
2020-05-09  1:36     ` Yonghong Song
2020-05-12  3:15       ` Andrii Nakryiko
2020-05-13 16:57         ` Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 04/21] bpf: support bpf tracing/iter programs for BPF_LINK_UPDATE Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 05/21] bpf: implement bpf_seq_read() for bpf iterator Yonghong Song
2020-05-08 18:52   ` Andrii Nakryiko
2020-05-09  1:41     ` Yonghong Song
2020-05-07  5:39 ` Yonghong Song [this message]
2020-05-08 18:57   ` [PATCH bpf-next v3 06/21] bpf: create anonymous " Andrii Nakryiko
2020-05-07  5:39 ` [PATCH bpf-next v3 07/21] bpf: create file " Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 08/21] bpf: implement common macros/helpers for target iterators Yonghong Song
2020-05-08 19:07   ` Andrii Nakryiko
2020-05-09  3:18     ` Yonghong Song
2020-05-12  3:16       ` Andrii Nakryiko
2020-05-07  5:39 ` [PATCH bpf-next v3 09/21] bpf: add bpf_map iterator Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 10/21] net: bpf: add netlink and ipv6_route bpf_iter targets Yonghong Song
2020-05-08 19:17   ` Andrii Nakryiko
2020-05-07  5:39 ` [PATCH bpf-next v3 11/21] bpf: add task and task/file iterator targets Yonghong Song
2020-05-08 19:36   ` Andrii Nakryiko
2020-05-07  5:39 ` [PATCH bpf-next v3 12/21] bpf: add PTR_TO_BTF_ID_OR_NULL support Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 13/21] bpf: add bpf_seq_printf and bpf_seq_write helpers Yonghong Song
2020-05-08 19:44   ` Andrii Nakryiko
2020-05-09  4:18     ` Yonghong Song
2020-05-09  5:30       ` Alexei Starovoitov
2020-05-09  6:04         ` Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 14/21] bpf: handle spilled PTR_TO_BTF_ID properly when checking stack_boundary Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 15/21] bpf: support variable length array in tracing programs Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 16/21] tools/libbpf: add bpf_iter support Yonghong Song
2020-05-08 19:46   ` Andrii Nakryiko
2020-05-07  5:39 ` [PATCH bpf-next v3 17/21] tools/libpf: add offsetof/container_of macro in bpf_helpers.h Yonghong Song
2020-05-08 19:48   ` Andrii Nakryiko
2020-05-07  5:39 ` [PATCH bpf-next v3 18/21] tools/bpftool: add bpf_iter support for bptool Yonghong Song
2020-05-08 19:51   ` Andrii Nakryiko
2020-05-09  5:26     ` Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 19/21] tools/bpf: selftests: add iterator programs for ipv6_route and netlink Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 20/21] tools/bpf: selftests: add iter progs for bpf_map/task/task_file Yonghong Song
2020-05-07  5:39 ` [PATCH bpf-next v3 21/21] tools/bpf: selftests: add bpf_iter selftests Yonghong Song
2020-05-08 19:57   ` Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200507053921.1542958-1-yhs@fb.com \
    --to=yhs@fb.com \
    --cc=andriin@fb.com \
    --cc=ast@fb.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kafai@fb.com \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.