io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC 0/4] add BPF-driven requests
@ 2021-02-17 12:38 Pavel Begunkov
  2021-02-17 12:38 ` [PATCH 1/4] bpf: add IOURING program type Pavel Begunkov
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Pavel Begunkov @ 2021-02-17 12:38 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Pretty much an RFC for executing BPF requests in io_uring to start a
discussion, so any ideas and wish lists are welcom. Some technical
but not much important for the discussion detais are omitted in the
patchset, i.e. good cancellation system, optimisations and some BPF
specific parts.

Some of major missing points:
1. We need to pass an CQE/result of a previous linked request to BPF.

2. Whether to keep to a new IORING_OP_BPF opcode, or do it in a common
path for each request, e.g. on CQE completion. The former looks saner,
but is not nicely aligned with (1.).

3. Allow BPF programs to generate CQEs not binded to a request. A
problem can be with supporting overflowed CQEs, it's either to
always kmalloc()'ing storage for them instead of using req's memory
or piling up on top. Eventually we will need it anyway to be able
to post several CQEs for a single requests.

Pavel Begunkov (4):
  bpf: add IOURING program type
  io_uring: implement bpf prog registration
  io_uring: add IORING_OP_BPF
  io_uring: enable BPF to submit SQEs

 fs/io_uring.c                 | 259 +++++++++++++++++++++++++++++++++-
 include/linux/bpf_types.h     |   2 +
 include/uapi/linux/bpf.h      |   2 +
 include/uapi/linux/io_uring.h |   3 +
 kernel/bpf/syscall.c          |   1 +
 kernel/bpf/verifier.c         |   3 +
 6 files changed, 264 insertions(+), 6 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] bpf: add IOURING program type
  2021-02-17 12:38 [RFC 0/4] add BPF-driven requests Pavel Begunkov
@ 2021-02-17 12:38 ` Pavel Begunkov
  2021-02-17 12:38 ` [PATCH 2/4] io_uring: implement bpf prog registration Pavel Begunkov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2021-02-17 12:38 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Draft a new program type BPF_PROG_TYPE_IOURING, which will be used by
io_uring to execute BPF-based requests.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c             | 21 +++++++++++++++++++++
 include/linux/bpf_types.h |  2 ++
 include/uapi/linux/bpf.h  |  1 +
 kernel/bpf/syscall.c      |  1 +
 kernel/bpf/verifier.c     |  3 +++
 5 files changed, 28 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 61b65edabe5e..2c8904bee386 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -10166,6 +10166,27 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	return ret;
 }
 
+static const struct bpf_func_proto *
+io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	return bpf_base_func_proto(func_id);
+}
+
+static bool io_bpf_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   const struct bpf_prog *prog,
+				   struct bpf_insn_access_aux *info)
+{
+	return false;
+}
+
+const struct bpf_prog_ops bpf_io_uring_prog_ops = {};
+
+const struct bpf_verifier_ops bpf_io_uring_verifier_ops = {
+	.get_func_proto		= io_bpf_func_proto,
+	.is_valid_access	= io_bpf_is_valid_access,
+};
+
 SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 		void __user *, arg, unsigned int, nr_args)
 {
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 99f7fd657d87..d0b7954887bd 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -77,6 +77,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
 	       void *, void *)
 #endif /* CONFIG_BPF_LSM */
 #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_IOURING, bpf_io_uring,
+	      void *, void *)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77d7c1bb2923..2f1c0ab097d8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -200,6 +200,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_EXT,
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
+	BPF_PROG_TYPE_IOURING,
 };
 
 enum bpf_attach_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e5999d86c76e..9b8f6b57fb1b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2031,6 +2031,7 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_IOURING:
 	case BPF_PROG_TYPE_EXT: /* extends any prog */
 		return true;
 	case BPF_PROG_TYPE_CGROUP_SKB:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e7368c5eacb7..54e26586932b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7981,6 +7981,9 @@ static int check_return_code(struct bpf_verifier_env *env)
 	case BPF_PROG_TYPE_SK_LOOKUP:
 		range = tnum_range(SK_DROP, SK_PASS);
 		break;
+	case BPF_PROG_TYPE_IOURING:
+		range = tnum_const(0);
+		break;
 	case BPF_PROG_TYPE_EXT:
 		/* freplace program can return anything as its return value
 		 * depends on the to-be-replaced kernel func or bpf program.
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] io_uring: implement bpf prog registration
  2021-02-17 12:38 [RFC 0/4] add BPF-driven requests Pavel Begunkov
  2021-02-17 12:38 ` [PATCH 1/4] bpf: add IOURING program type Pavel Begunkov
@ 2021-02-17 12:38 ` Pavel Begunkov
  2021-02-17 12:38 ` [PATCH 3/4] io_uring: add IORING_OP_BPF Pavel Begunkov
  2021-02-17 12:38 ` [PATCH 4/4] io_uring: enable BPF to submit SQEs Pavel Begunkov
  3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2021-02-17 12:38 UTC (permalink / raw)
  To: Jens Axboe, io-uring

[de]register BPF programs through io_uring_register() with new
IORING_ATTACH_BPF and IORING_DETACH_BPF commands.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 80 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  2 +
 2 files changed, 82 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2c8904bee386..524cf1eb1cec 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -82,6 +82,7 @@
 #include <linux/io_uring.h>
 #include <linux/blk-cgroup.h>
 #include <linux/audit.h>
+#include <linux/bpf.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -249,6 +250,10 @@ struct io_restriction {
 	bool registered;
 };
 
+struct io_bpf_prog {
+	struct bpf_prog *prog;
+};
+
 struct io_sq_data {
 	refcount_t		refs;
 	struct mutex		lock;
@@ -388,6 +393,10 @@ struct io_ring_ctx {
 	unsigned		nr_user_bufs;
 	struct io_mapped_ubuf	*user_bufs;
 
+	/* bpf programs */
+	struct io_bpf_prog	*bpf_progs;
+	unsigned		nr_bpf_progs;
+
 	struct user_struct	*user;
 
 	const struct cred	*creds;
@@ -8694,6 +8703,67 @@ static void io_req_cache_free(struct list_head *list)
 	}
 }
 
+static int io_bpf_detach(struct io_ring_ctx *ctx)
+{
+	int i;
+
+	if (!ctx->nr_bpf_progs)
+		return -ENXIO;
+
+	for (i = 0; i < ctx->nr_bpf_progs; ++i) {
+		struct bpf_prog *prog = ctx->bpf_progs[i].prog;
+
+		if (prog)
+			bpf_prog_put(prog);
+	}
+	kfree(ctx->bpf_progs);
+	ctx->bpf_progs = NULL;
+	ctx->nr_bpf_progs = 0;
+	return 0;
+}
+
+static int io_bpf_attach(struct io_ring_ctx *ctx, void __user *arg,
+			 unsigned int nr_args)
+{
+	u32 __user *fds = arg;
+	int i, ret = 0;
+
+	if (!nr_args || nr_args > 100)
+		return -EINVAL;
+	if (ctx->nr_bpf_progs)
+		return -EBUSY;
+
+	ctx->bpf_progs = kcalloc(nr_args, sizeof(ctx->bpf_progs[0]),
+				 GFP_KERNEL);
+	if (!ctx->bpf_progs)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_args; ++i) {
+		struct bpf_prog *prog;
+		u32 fd;
+
+		if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
+			ret = -EFAULT;
+			break;
+		}
+		if (fd == -1)
+			continue;
+
+		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_IOURING);
+		if (IS_ERR(prog)) {
+			ret = PTR_ERR(prog);
+			break;
+		}
+
+		ctx->bpf_progs[i].prog = prog;
+	}
+
+	ctx->nr_bpf_progs = i;
+	if (ret)
+		io_bpf_detach(ctx);
+	return ret;
+}
+
 static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
 	struct io_submit_state *submit_state = &ctx->submit_state;
@@ -8708,6 +8778,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 
 	io_finish_async(ctx);
 	io_sqe_buffers_unregister(ctx);
+	io_bpf_detach(ctx);
 
 	if (ctx->sqo_task) {
 		put_task_struct(ctx->sqo_task);
@@ -10151,6 +10222,15 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	case IORING_REGISTER_RESTRICTIONS:
 		ret = io_register_restrictions(ctx, arg, nr_args);
 		break;
+	case IORING_ATTACH_BPF:
+		ret = io_bpf_attach(ctx, arg, nr_args);
+		break;
+	case IORING_DETACH_BPF:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_bpf_detach(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ac4e1738a9af..d95e04d6d316 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -280,6 +280,8 @@ enum {
 	IORING_UNREGISTER_PERSONALITY		= 10,
 	IORING_REGISTER_RESTRICTIONS		= 11,
 	IORING_REGISTER_ENABLE_RINGS		= 12,
+	IORING_ATTACH_BPF			= 13,
+	IORING_DETACH_BPF			= 14,
 
 	/* this goes last */
 	IORING_REGISTER_LAST
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] io_uring: add IORING_OP_BPF
  2021-02-17 12:38 [RFC 0/4] add BPF-driven requests Pavel Begunkov
  2021-02-17 12:38 ` [PATCH 1/4] bpf: add IOURING program type Pavel Begunkov
  2021-02-17 12:38 ` [PATCH 2/4] io_uring: implement bpf prog registration Pavel Begunkov
@ 2021-02-17 12:38 ` Pavel Begunkov
  2021-02-17 12:38 ` [PATCH 4/4] io_uring: enable BPF to submit SQEs Pavel Begunkov
  3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2021-02-17 12:38 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Wire up a new io_uring operation type IORING_OP_BPF, which executes a
specified BPF program from the registered prog table. It doesn't allow
to do anything useful for now, no BPF functions are allowed apart from
basic ones.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 83 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  1 +
 2 files changed, 84 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 524cf1eb1cec..716881ca0b48 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -637,6 +637,11 @@ struct io_unlink {
 	struct filename			*filename;
 };
 
+struct io_bpf {
+	struct file			*file;
+	struct bpf_prog			*prog;
+};
+
 struct io_completion {
 	struct file			*file;
 	struct list_head		list;
@@ -773,6 +778,7 @@ struct io_kiocb {
 		struct io_shutdown	shutdown;
 		struct io_rename	rename;
 		struct io_unlink	unlink;
+		struct io_bpf		bpf;
 		/* use only after cleaning per-op data, see io_clean_op() */
 		struct io_completion	compl;
 	};
@@ -839,6 +845,10 @@ struct io_op_def {
 	unsigned		work_flags;
 };
 
+
+struct io_bpf_ctx {
+};
+
 static const struct io_op_def io_op_defs[] = {
 	[IORING_OP_NOP] = {},
 	[IORING_OP_READV] = {
@@ -1029,6 +1039,9 @@ static const struct io_op_def io_op_defs[] = {
 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
 						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
 	},
+	[IORING_OP_BPF]	= {
+		.work_flags		= IO_WQ_WORK_MM,
+	},
 };
 
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
@@ -1068,6 +1081,7 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
 static void io_req_task_queue(struct io_kiocb *req);
 static void io_submit_flush_completions(struct io_comp_state *cs,
 					struct io_ring_ctx *ctx);
+static void io_bpf_run(struct io_kiocb *req);
 
 static struct kmem_cache *req_cachep;
 
@@ -4208,6 +4222,53 @@ static int io_openat(struct io_kiocb *req, unsigned int issue_flags)
 	return io_openat2(req, issue_flags & IO_URING_F_NONBLOCK);
 }
 
+static int io_bpf_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct bpf_prog *prog;
+	unsigned int idx;
+
+	if (unlikely(ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+		return -EINVAL;
+	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->len || sqe->cancel_flags)
+		return -EINVAL;
+	if (sqe->addr)
+		return -EINVAL;
+
+	idx = READ_ONCE(sqe->off);
+	if (unlikely(idx >= ctx->nr_bpf_progs))
+		return -EFAULT;
+	idx = array_index_nospec(idx, ctx->nr_bpf_progs);
+	prog = ctx->bpf_progs[idx].prog;
+	if (!prog)
+		return -EFAULT;
+
+	req->bpf.prog = prog;
+	return 0;
+}
+
+static void io_bpf_run_task_work(struct callback_head *cb)
+{
+	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+	struct io_ring_ctx *ctx = req->ctx;
+
+	mutex_lock(&ctx->uring_lock);
+	io_bpf_run(req);
+	mutex_unlock(&ctx->uring_lock);
+}
+
+static int io_bpf(struct io_kiocb *req, unsigned int issue_flags)
+{
+	init_task_work(&req->task_work, io_bpf_run_task_work);
+	if (unlikely(io_req_task_work_add(req))) {
+		percpu_ref_get(&req->ctx->refs);
+		io_req_task_work_add_fallback(req, io_req_task_cancel);
+	}
+	return 0;
+}
+
 static int io_remove_buffers_prep(struct io_kiocb *req,
 				  const struct io_uring_sqe *sqe)
 {
@@ -6142,6 +6203,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return io_renameat_prep(req, sqe);
 	case IORING_OP_UNLINKAT:
 		return io_unlinkat_prep(req, sqe);
+	case IORING_OP_BPF:
+		return io_bpf_prep(req, sqe);
 	}
 
 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6380,6 +6443,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	case IORING_OP_UNLINKAT:
 		ret = io_unlinkat(req, issue_flags);
 		break;
+	case IORING_OP_BPF:
+		ret = io_bpf(req, issue_flags);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -10267,6 +10333,23 @@ const struct bpf_verifier_ops bpf_io_uring_verifier_ops = {
 	.is_valid_access	= io_bpf_is_valid_access,
 };
 
+static void io_bpf_run(struct io_kiocb *req)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_bpf_ctx bpf_ctx;
+
+	lockdep_assert_held(&req->ctx->uring_lock);
+
+	if (unlikely(percpu_ref_is_dying(&ctx->refs))) {
+		io_req_complete(req, -EAGAIN);
+		return;
+	}
+
+	memset(&bpf_ctx, 0, sizeof(bpf_ctx));
+	BPF_PROG_RUN(req->bpf.prog, &bpf_ctx);
+	io_req_complete(req, 0);
+}
+
 SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 		void __user *, arg, unsigned int, nr_args)
 {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d95e04d6d316..b75dfbf4f2cb 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -137,6 +137,7 @@ enum {
 	IORING_OP_SHUTDOWN,
 	IORING_OP_RENAMEAT,
 	IORING_OP_UNLINKAT,
+	IORING_OP_BPF,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] io_uring: enable BPF to submit SQEs
  2021-02-17 12:38 [RFC 0/4] add BPF-driven requests Pavel Begunkov
                   ` (2 preceding siblings ...)
  2021-02-17 12:38 ` [PATCH 3/4] io_uring: add IORING_OP_BPF Pavel Begunkov
@ 2021-02-17 12:38 ` Pavel Begunkov
  3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2021-02-17 12:38 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Add a BPF_FUNC_iouring_queue_sqe BPF function as a demonstration of
submmiting a new request by a BPF request.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c            | 79 ++++++++++++++++++++++++++++++++++++----
 include/uapi/linux/bpf.h |  1 +
 2 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 716881ca0b48..2c63a3e68938 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -845,8 +845,14 @@ struct io_op_def {
 	unsigned		work_flags;
 };
 
+struct io_submit_link {
+	struct io_kiocb *head;
+	struct io_kiocb *last;
+};
 
 struct io_bpf_ctx {
+	struct io_ring_ctx 		*ctx;
+	struct io_submit_link		link;
 };
 
 static const struct io_op_def io_op_defs[] = {
@@ -6716,11 +6722,6 @@ static inline void io_queue_link_head(struct io_kiocb *req)
 		io_queue_sqe(req, NULL);
 }
 
-struct io_submit_link {
-	struct io_kiocb *head;
-	struct io_kiocb *last;
-};
-
 static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			 struct io_submit_link *link)
 {
@@ -6951,7 +6952,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			ret = -EBADF;
 	}
 
-	state->ios_left--;
+	if (state->ios_left > 1)
+		state->ios_left--;
 	return ret;
 }
 
@@ -10312,10 +10314,63 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	return ret;
 }
 
+static int io_ebpf_prep_req(struct io_bpf_ctx *bpf_ctx,
+			    const struct io_uring_sqe *sqe)
+{
+	struct io_ring_ctx *ctx = bpf_ctx->ctx;
+	struct io_kiocb *req = io_alloc_req(ctx);
+	int ret;
+
+	if (unlikely(!req))
+		return -ENOMEM;
+	if (!percpu_ref_tryget_many(&ctx->refs, 1)) {
+		kmem_cache_free(req_cachep, req);
+		return -EAGAIN;
+	}
+	percpu_counter_add(&current->io_uring->inflight, 1);
+	refcount_add(1, &current->usage);
+
+	ret = io_init_req(ctx, req, sqe);
+	if (unlikely(ret))
+		goto fail_req;
+
+	ret = io_submit_sqe(req, sqe, &bpf_ctx->link);
+	if (!ret)
+		return 0;
+fail_req:
+	io_double_put_req(req);
+	return ret;
+}
+
+BPF_CALL_3(bpf_io_uring_queue_sqe, void *, ctx, const void *, psqe, u32, len)
+{
+	const struct io_uring_sqe *sqe = psqe;
+	struct io_bpf_ctx *bpf_ctx = ctx;
+
+	if (len != sizeof(struct io_uring_sqe))
+		return -EINVAL;
+
+	return io_ebpf_prep_req(bpf_ctx, sqe);
+}
+
+const struct bpf_func_proto bpf_io_uring_queue_sqe_proto = {
+	.func = bpf_io_uring_queue_sqe,
+	.gpl_only = false,
+	.ret_type = RET_INTEGER,
+	.arg1_type = ARG_PTR_TO_CTX,
+	.arg2_type = ARG_PTR_TO_MEM,
+	.arg3_type = ARG_CONST_SIZE,
+};
+
 static const struct bpf_func_proto *
 io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
-	return bpf_base_func_proto(func_id);
+	switch (func_id) {
+	case BPF_FUNC_iouring_queue_sqe:
+		return &bpf_io_uring_queue_sqe_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
 }
 
 static bool io_bpf_is_valid_access(int off, int size,
@@ -10345,8 +10400,16 @@ static void io_bpf_run(struct io_kiocb *req)
 		return;
 	}
 
-	memset(&bpf_ctx, 0, sizeof(bpf_ctx));
+	io_submit_state_start(&ctx->submit_state, 1);
+	bpf_ctx.ctx = ctx;
+	bpf_ctx.link.head = NULL;
+
 	BPF_PROG_RUN(req->bpf.prog, &bpf_ctx);
+
+	if (bpf_ctx.link.head)
+		io_queue_link_head(bpf_ctx.link.head);
+	io_submit_state_end(&ctx->submit_state, ctx);
+
 	io_req_complete(req, 0);
 }
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2f1c0ab097d8..8c7c8f4ad044 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3996,6 +3996,7 @@ union bpf_attr {
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
 	FN(sock_from_file),		\
+	FN(iouring_queue_sqe),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-02-17 12:51 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-17 12:38 [RFC 0/4] add BPF-driven requests Pavel Begunkov
2021-02-17 12:38 ` [PATCH 1/4] bpf: add IOURING program type Pavel Begunkov
2021-02-17 12:38 ` [PATCH 2/4] io_uring: implement bpf prog registration Pavel Begunkov
2021-02-17 12:38 ` [PATCH 3/4] io_uring: add IORING_OP_BPF Pavel Begunkov
2021-02-17 12:38 ` [PATCH 4/4] io_uring: enable BPF to submit SQEs Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).