All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 5.13] io_uring: add IORING_REGISTER_PRIORITY
@ 2021-05-06 14:33 Hao Xu
  2021-05-06 17:10 ` Jens Axboe
  0 siblings, 1 reply; 6+ messages in thread
From: Hao Xu @ 2021-05-06 14:33 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, Pavel Begunkov, Joseph Qi

Users may want a higher priority for sq_thread or io-worker. Provide a
way to change the nice value(for SCHED_NORMAL) or scheduling policy.

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
---

currently support for sqthread, while space reserved for io-worker.

 fs/io_uring.c                 | 117 ++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  29 +++++++++++
 2 files changed, 146 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 63ff70587d4f..a77b7a05c058 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -83,6 +83,7 @@
 #include <trace/events/io_uring.h>
 
 #include <uapi/linux/io_uring.h>
+#include <uapi/linux/sched/types.h>
 
 #include "internal.h"
 #include "io-wq.h"
@@ -9908,6 +9909,115 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
 	return -EINVAL;
 }
 
+/*
+ * Returns true if current's euid is same as p's uid or euid,
+ * or has CAP_SYS_NICE to p's user_ns.
+ *
+ * Called with rcu_read_lock, creds are safe
+ */
+static bool set_one_prio_perm(struct task_struct *p)
+{
+	const struct cred *cred = current_cred(), *pcred = __task_cred(p);
+
+	if (uid_eq(pcred->uid,  cred->euid) ||
+	    uid_eq(pcred->euid, cred->euid))
+		return true;
+	if (ns_capable(pcred->user_ns, CAP_SYS_NICE))
+		return true;
+	return false;
+}
+
+/*
+ * set the priority of a task
+ * - the caller must hold the RCU read lock
+ */
+static int set_one_prio(struct task_struct *p, int niceval, int error)
+{
+	int no_nice;
+
+	if (!set_one_prio_perm(p)) {
+		error = -EPERM;
+		goto out;
+	}
+	if (niceval < task_nice(p) && !can_nice(p, niceval)) {
+		error = -EACCES;
+		goto out;
+	}
+	no_nice = security_task_setnice(p, niceval);
+	if (no_nice) {
+		error = no_nice;
+		goto out;
+	}
+	if (error == -ESRCH)
+		error = 0;
+	set_user_nice(p, niceval);
+out:
+	return error;
+}
+
+static int io_register_priority(struct io_ring_ctx *ctx, void __user *arg)
+{
+	struct io_uring_priority prio;
+	int error = -ESRCH;
+
+	if (copy_from_user(&prio, arg, sizeof(struct io_uring_priority)))
+		return -EFAULT;
+
+	if (prio.target >= IORING_PRIORITY_TARGET_LAST)
+		return -EINVAL;
+
+	if (prio.mode >= IORING_PRIORITY_MODE_LAST)
+		return -EINVAL;
+
+
+	switch (prio.mode) {
+	case IORING_PRIORITY_MODE_NICE: {
+		int niceval = prio.nice;
+
+		if (niceval < MIN_NICE)
+			niceval = MIN_NICE;
+		if (niceval > MAX_NICE)
+			niceval = MAX_NICE;
+
+		if (prio.target == IORING_PRIORITY_TARGET_SQ) {
+			struct io_sq_data *sqd = ctx->sq_data;
+
+			if (!sqd)
+				return error;
+
+			io_sq_thread_park(sqd);
+			if (sqd->thread)
+				error = set_one_prio(sqd->thread, niceval, -ESRCH);
+			io_sq_thread_unpark(sqd);
+		}
+		break;
+	}
+	case IORING_PRIORITY_MODE_SCHEDULER: {
+		int policy = prio.param.policy;
+		int sched_priority = prio.param.sched_priority;
+		struct sched_param lparam = { .sched_priority = sched_priority };
+
+		if (policy < 0)
+			return -EINVAL;
+
+		if (prio.target == IORING_PRIORITY_TARGET_SQ) {
+			struct io_sq_data *sqd = ctx->sq_data;
+
+			if (!sqd)
+				return error;
+
+			io_sq_thread_park(sqd);
+			if (sqd->thread)
+				error = sched_setscheduler(sqd->thread, policy, &lparam);
+			io_sq_thread_unpark(sqd);
+		}
+		break;
+	}
+	}
+
+	return error;
+}
+
 static bool io_register_op_must_quiesce(int op)
 {
 	switch (op) {
@@ -9921,6 +10031,7 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_UNREGISTER_PERSONALITY:
 	case IORING_REGISTER_RSRC:
 	case IORING_REGISTER_RSRC_UPDATE:
+	case IORING_REGISTER_PRIORITY:
 		return false;
 	default:
 		return true;
@@ -10052,6 +10163,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	case IORING_REGISTER_RSRC_UPDATE:
 		ret = io_register_rsrc_update(ctx, arg, nr_args);
 		break;
+	case IORING_REGISTER_PRIORITY:
+		ret = -EINVAL;
+		if (!arg || nr_args != 1)
+			break;
+		ret = io_register_priority(ctx, arg);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index e1ae46683301..4788a62bf3a1 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -300,6 +300,7 @@ enum {
 	IORING_REGISTER_ENABLE_RINGS		= 12,
 	IORING_REGISTER_RSRC			= 13,
 	IORING_REGISTER_RSRC_UPDATE		= 14,
+	IORING_REGISTER_PRIORITY		= 15,
 
 	/* this goes last */
 	IORING_REGISTER_LAST
@@ -396,4 +397,32 @@ struct io_uring_getevents_arg {
 	__u64	ts;
 };
 
+enum {
+	IORING_PRIORITY_TARGET_SQ,	/*  update priority of sqthread */
+	IORING_PRIORITY_TARGET_WQ,	/*  update priority of io worker */
+
+	IORING_PRIORITY_TARGET_LAST,
+};
+
+enum {
+	IORING_PRIORITY_MODE_NICE,
+	IORING_PRIORITY_MODE_SCHEDULER,
+
+	IORING_PRIORITY_MODE_LAST,
+};
+
+struct io_uring_sched_param {
+	__s32 policy;
+	__s32 sched_priority;
+};
+
+struct io_uring_priority {
+	__u32 target;
+	__u32 mode;
+	union {
+		__s32 nice;
+		struct io_uring_sched_param param;
+	};
+};
+
 #endif
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-05-07 15:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-06 14:33 [PATCH RFC 5.13] io_uring: add IORING_REGISTER_PRIORITY Hao Xu
2021-05-06 17:10 ` Jens Axboe
2021-05-06 19:20   ` Hao Xu
2021-05-07 15:19     ` Pavel Begunkov
2021-05-07 15:23       ` Jens Axboe
2021-05-07 15:22     ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.