io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] io_uring: add timeout support for io_uring_enter()
@ 2020-08-04  9:28 Jiufei Xue
  2020-08-13  2:08 ` Jiufei Xue
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Jiufei Xue @ 2020-08-04  9:28 UTC (permalink / raw)
  To: axboe; +Cc: io-uring, metze, Jiufei Xue

Now users who want to get woken when waiting for events should submit a
timeout command first. It is not safe for applications that split SQ and
CQ handling between two threads, such as mysql. Users should synchronize
the two threads explicitly to protect SQ and that will impact the
performance.

This patch adds support for timeout to existing io_uring_enter(). To
avoid overloading arguments, it introduces a new parameter structure
which contains sigmask and timeout.

I have tested the workloads with one thread submiting nop requests
while the other reaping the cqe with timeout. It shows 1.8~2x faster
when the iodepth is 16.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
---
 fs/io_uring.c                 | 45 +++++++++++++++++++++++++++++++++++++------
 include/uapi/linux/io_uring.h |  7 +++++++
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2a3af95..cdd89e4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6514,7 +6514,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
  * application must reap them itself, as they reside on the shared cq ring.
  */
 static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
-			  const sigset_t __user *sig, size_t sigsz)
+			  const sigset_t __user *sig, size_t sigsz,
+			  struct __kernel_timespec __user *uts)
 {
 	struct io_wait_queue iowq = {
 		.wq = {
@@ -6526,6 +6527,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 		.to_wait	= min_events,
 	};
 	struct io_rings *rings = ctx->rings;
+	struct timespec64 ts;
+	signed long timeout = 0;
 	int ret = 0;
 
 	do {
@@ -6548,6 +6551,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			return ret;
 	}
 
+	if (uts) {
+		if (get_timespec64(&ts, uts))
+			return -EFAULT;
+		timeout = timespec64_to_jiffies(&ts);
+	}
+
 	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
 	trace_io_uring_cqring_wait(ctx, min_events);
 	do {
@@ -6569,7 +6578,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 		}
 		if (io_should_wake(&iowq, false))
 			break;
-		schedule();
+		if (uts) {
+			if ((timeout = schedule_timeout(timeout)) == 0) {
+				ret = -ETIME;
+				break;
+			}
+		} else {
+			schedule();
+		}
 	} while (1);
 	finish_wait(&ctx->wait, &iowq.wq);
 
@@ -7993,19 +8009,36 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
 #endif /* !CONFIG_MMU */
 
 SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
-		u32, min_complete, u32, flags, const sigset_t __user *, sig,
+		u32, min_complete, u32, flags, const void __user *, argp,
 		size_t, sigsz)
 {
 	struct io_ring_ctx *ctx;
 	long ret = -EBADF;
 	int submitted = 0;
 	struct fd f;
+	const sigset_t __user *sig;
+	struct __kernel_timespec __user *ts;
+	struct io_uring_getevents_arg arg;
 
 	io_run_task_work();
 
-	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
+	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
+		      IORING_ENTER_GETEVENTS_TIMEOUT))
 		return -EINVAL;
 
+	/* deal with IORING_ENTER_GETEVENTS_TIMEOUT */
+	if (flags & IORING_ENTER_GETEVENTS_TIMEOUT) {
+		if (!(flags & IORING_ENTER_GETEVENTS))
+			return -EINVAL;
+		if (copy_from_user(&arg, argp, sizeof(arg)))
+			return -EFAULT;
+		sig = arg.sigmask;
+		ts = arg.ts;
+	} else {
+		sig = (const sigset_t __user *)argp;
+		ts = NULL;
+	}
+
 	f = fdget(fd);
 	if (!f.file)
 		return -EBADF;
@@ -8052,7 +8085,7 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
 		    !(ctx->flags & IORING_SETUP_SQPOLL)) {
 			ret = io_iopoll_check(ctx, min_complete);
 		} else {
-			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
+			ret = io_cqring_wait(ctx, min_complete, sig, sigsz, ts);
 		}
 	}
 
@@ -8346,7 +8379,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
 			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
-			IORING_FEAT_POLL_32BITS;
+			IORING_FEAT_POLL_32BITS | IORING_FEAT_GETEVENTS_TIMEOUT;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d65fde7..70764d2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -224,6 +224,7 @@ struct io_cqring_offsets {
  */
 #define IORING_ENTER_GETEVENTS	(1U << 0)
 #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
+#define IORING_ENTER_GETEVENTS_TIMEOUT	(1U << 2)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -251,6 +252,7 @@ struct io_uring_params {
 #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
 #define IORING_FEAT_FAST_POLL		(1U << 5)
 #define IORING_FEAT_POLL_32BITS 	(1U << 6)
+#define IORING_FEAT_GETEVENTS_TIMEOUT	(1U << 7)
 
 /*
  * io_uring_register(2) opcodes and arguments
@@ -290,4 +292,9 @@ struct io_uring_probe {
 	struct io_uring_probe_op ops[0];
 };
 
+struct io_uring_getevents_arg {
+	sigset_t *sigmask;
+	struct __kernel_timespec *ts;
+};
+
 #endif
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] io_uring: add timeout support for io_uring_enter()
  2020-08-04  9:28 [PATCH v2] io_uring: add timeout support for io_uring_enter() Jiufei Xue
@ 2020-08-13  2:08 ` Jiufei Xue
  2020-08-24  1:49 ` Jiufei Xue
  2020-10-31  2:54 ` Jens Axboe
  2 siblings, 0 replies; 6+ messages in thread
From: Jiufei Xue @ 2020-08-13  2:08 UTC (permalink / raw)
  To: axboe; +Cc: io-uring, metze

Hi Jens,

Could you please review this patch?

Thinks,
Jiufei

On 2020/8/4 下午5:28, Jiufei Xue wrote:
> Now users who want to get woken when waiting for events should submit a
> timeout command first. It is not safe for applications that split SQ and
> CQ handling between two threads, such as mysql. Users should synchronize
> the two threads explicitly to protect SQ and that will impact the
> performance.
> 
> This patch adds support for timeout to existing io_uring_enter(). To
> avoid overloading arguments, it introduces a new parameter structure
> which contains sigmask and timeout.
> 
> I have tested the workloads with one thread submiting nop requests
> while the other reaping the cqe with timeout. It shows 1.8~2x faster
> when the iodepth is 16.
> 
> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
> ---
>  fs/io_uring.c                 | 45 +++++++++++++++++++++++++++++++++++++------
>  include/uapi/linux/io_uring.h |  7 +++++++
>  2 files changed, 46 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 2a3af95..cdd89e4 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -6514,7 +6514,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
>   * application must reap them itself, as they reside on the shared cq ring.
>   */
>  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
> -			  const sigset_t __user *sig, size_t sigsz)
> +			  const sigset_t __user *sig, size_t sigsz,
> +			  struct __kernel_timespec __user *uts)
>  {
>  	struct io_wait_queue iowq = {
>  		.wq = {
> @@ -6526,6 +6527,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  		.to_wait	= min_events,
>  	};
>  	struct io_rings *rings = ctx->rings;
> +	struct timespec64 ts;
> +	signed long timeout = 0;
>  	int ret = 0;
>  
>  	do {
> @@ -6548,6 +6551,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  			return ret;
>  	}
>  
> +	if (uts) {
> +		if (get_timespec64(&ts, uts))
> +			return -EFAULT;
> +		timeout = timespec64_to_jiffies(&ts);
> +	}
> +
>  	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
>  	trace_io_uring_cqring_wait(ctx, min_events);
>  	do {
> @@ -6569,7 +6578,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  		}
>  		if (io_should_wake(&iowq, false))
>  			break;
> -		schedule();
> +		if (uts) {
> +			if ((timeout = schedule_timeout(timeout)) == 0) {
> +				ret = -ETIME;
> +				break;
> +			}
> +		} else {
> +			schedule();
> +		}
>  	} while (1);
>  	finish_wait(&ctx->wait, &iowq.wq);
>  
> @@ -7993,19 +8009,36 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>  #endif /* !CONFIG_MMU */
>  
>  SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
> -		u32, min_complete, u32, flags, const sigset_t __user *, sig,
> +		u32, min_complete, u32, flags, const void __user *, argp,
>  		size_t, sigsz)
>  {
>  	struct io_ring_ctx *ctx;
>  	long ret = -EBADF;
>  	int submitted = 0;
>  	struct fd f;
> +	const sigset_t __user *sig;
> +	struct __kernel_timespec __user *ts;
> +	struct io_uring_getevents_arg arg;
>  
>  	io_run_task_work();
>  
> -	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
> +	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
> +		      IORING_ENTER_GETEVENTS_TIMEOUT))
>  		return -EINVAL;
>  
> +	/* deal with IORING_ENTER_GETEVENTS_TIMEOUT */
> +	if (flags & IORING_ENTER_GETEVENTS_TIMEOUT) {
> +		if (!(flags & IORING_ENTER_GETEVENTS))
> +			return -EINVAL;
> +		if (copy_from_user(&arg, argp, sizeof(arg)))
> +			return -EFAULT;
> +		sig = arg.sigmask;
> +		ts = arg.ts;
> +	} else {
> +		sig = (const sigset_t __user *)argp;
> +		ts = NULL;
> +	}
> +
>  	f = fdget(fd);
>  	if (!f.file)
>  		return -EBADF;
> @@ -8052,7 +8085,7 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>  		    !(ctx->flags & IORING_SETUP_SQPOLL)) {
>  			ret = io_iopoll_check(ctx, min_complete);
>  		} else {
> -			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
> +			ret = io_cqring_wait(ctx, min_complete, sig, sigsz, ts);
>  		}
>  	}
>  
> @@ -8346,7 +8379,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
>  	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
>  			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
>  			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
> -			IORING_FEAT_POLL_32BITS;
> +			IORING_FEAT_POLL_32BITS | IORING_FEAT_GETEVENTS_TIMEOUT;
>  
>  	if (copy_to_user(params, p, sizeof(*p))) {
>  		ret = -EFAULT;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index d65fde7..70764d2 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -224,6 +224,7 @@ struct io_cqring_offsets {
>   */
>  #define IORING_ENTER_GETEVENTS	(1U << 0)
>  #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
> +#define IORING_ENTER_GETEVENTS_TIMEOUT	(1U << 2)
>  
>  /*
>   * Passed in for io_uring_setup(2). Copied back with updated info on success
> @@ -251,6 +252,7 @@ struct io_uring_params {
>  #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
>  #define IORING_FEAT_FAST_POLL		(1U << 5)
>  #define IORING_FEAT_POLL_32BITS 	(1U << 6)
> +#define IORING_FEAT_GETEVENTS_TIMEOUT	(1U << 7)
>  
>  /*
>   * io_uring_register(2) opcodes and arguments
> @@ -290,4 +292,9 @@ struct io_uring_probe {
>  	struct io_uring_probe_op ops[0];
>  };
>  
> +struct io_uring_getevents_arg {
> +	sigset_t *sigmask;
> +	struct __kernel_timespec *ts;
> +};
> +
>  #endif
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] io_uring: add timeout support for io_uring_enter()
  2020-08-04  9:28 [PATCH v2] io_uring: add timeout support for io_uring_enter() Jiufei Xue
  2020-08-13  2:08 ` Jiufei Xue
@ 2020-08-24  1:49 ` Jiufei Xue
  2020-10-30 12:50   ` Pavel Begunkov
  2020-10-31  2:54 ` Jens Axboe
  2 siblings, 1 reply; 6+ messages in thread
From: Jiufei Xue @ 2020-08-24  1:49 UTC (permalink / raw)
  To: axboe; +Cc: io-uring, metze

ping...

On 2020/8/4 下午5:28, Jiufei Xue wrote:
> Now users who want to get woken when waiting for events should submit a
> timeout command first. It is not safe for applications that split SQ and
> CQ handling between two threads, such as mysql. Users should synchronize
> the two threads explicitly to protect SQ and that will impact the
> performance.
> 
> This patch adds support for timeout to existing io_uring_enter(). To
> avoid overloading arguments, it introduces a new parameter structure
> which contains sigmask and timeout.
> 
> I have tested the workloads with one thread submiting nop requests
> while the other reaping the cqe with timeout. It shows 1.8~2x faster
> when the iodepth is 16.
> 
> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
> ---
>  fs/io_uring.c                 | 45 +++++++++++++++++++++++++++++++++++++------
>  include/uapi/linux/io_uring.h |  7 +++++++
>  2 files changed, 46 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 2a3af95..cdd89e4 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -6514,7 +6514,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
>   * application must reap them itself, as they reside on the shared cq ring.
>   */
>  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
> -			  const sigset_t __user *sig, size_t sigsz)
> +			  const sigset_t __user *sig, size_t sigsz,
> +			  struct __kernel_timespec __user *uts)
>  {
>  	struct io_wait_queue iowq = {
>  		.wq = {
> @@ -6526,6 +6527,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  		.to_wait	= min_events,
>  	};
>  	struct io_rings *rings = ctx->rings;
> +	struct timespec64 ts;
> +	signed long timeout = 0;
>  	int ret = 0;
>  
>  	do {
> @@ -6548,6 +6551,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  			return ret;
>  	}
>  
> +	if (uts) {
> +		if (get_timespec64(&ts, uts))
> +			return -EFAULT;
> +		timeout = timespec64_to_jiffies(&ts);
> +	}
> +
>  	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
>  	trace_io_uring_cqring_wait(ctx, min_events);
>  	do {
> @@ -6569,7 +6578,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  		}
>  		if (io_should_wake(&iowq, false))
>  			break;
> -		schedule();
> +		if (uts) {
> +			if ((timeout = schedule_timeout(timeout)) == 0) {
> +				ret = -ETIME;
> +				break;
> +			}
> +		} else {
> +			schedule();
> +		}
>  	} while (1);
>  	finish_wait(&ctx->wait, &iowq.wq);
>  
> @@ -7993,19 +8009,36 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>  #endif /* !CONFIG_MMU */
>  
>  SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
> -		u32, min_complete, u32, flags, const sigset_t __user *, sig,
> +		u32, min_complete, u32, flags, const void __user *, argp,
>  		size_t, sigsz)
>  {
>  	struct io_ring_ctx *ctx;
>  	long ret = -EBADF;
>  	int submitted = 0;
>  	struct fd f;
> +	const sigset_t __user *sig;
> +	struct __kernel_timespec __user *ts;
> +	struct io_uring_getevents_arg arg;
>  
>  	io_run_task_work();
>  
> -	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
> +	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
> +		      IORING_ENTER_GETEVENTS_TIMEOUT))
>  		return -EINVAL;
>  
> +	/* deal with IORING_ENTER_GETEVENTS_TIMEOUT */
> +	if (flags & IORING_ENTER_GETEVENTS_TIMEOUT) {
> +		if (!(flags & IORING_ENTER_GETEVENTS))
> +			return -EINVAL;
> +		if (copy_from_user(&arg, argp, sizeof(arg)))
> +			return -EFAULT;
> +		sig = arg.sigmask;
> +		ts = arg.ts;
> +	} else {
> +		sig = (const sigset_t __user *)argp;
> +		ts = NULL;
> +	}
> +
>  	f = fdget(fd);
>  	if (!f.file)
>  		return -EBADF;
> @@ -8052,7 +8085,7 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>  		    !(ctx->flags & IORING_SETUP_SQPOLL)) {
>  			ret = io_iopoll_check(ctx, min_complete);
>  		} else {
> -			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
> +			ret = io_cqring_wait(ctx, min_complete, sig, sigsz, ts);
>  		}
>  	}
>  
> @@ -8346,7 +8379,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
>  	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
>  			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
>  			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
> -			IORING_FEAT_POLL_32BITS;
> +			IORING_FEAT_POLL_32BITS | IORING_FEAT_GETEVENTS_TIMEOUT;
>  
>  	if (copy_to_user(params, p, sizeof(*p))) {
>  		ret = -EFAULT;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index d65fde7..70764d2 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -224,6 +224,7 @@ struct io_cqring_offsets {
>   */
>  #define IORING_ENTER_GETEVENTS	(1U << 0)
>  #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
> +#define IORING_ENTER_GETEVENTS_TIMEOUT	(1U << 2)
>  
>  /*
>   * Passed in for io_uring_setup(2). Copied back with updated info on success
> @@ -251,6 +252,7 @@ struct io_uring_params {
>  #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
>  #define IORING_FEAT_FAST_POLL		(1U << 5)
>  #define IORING_FEAT_POLL_32BITS 	(1U << 6)
> +#define IORING_FEAT_GETEVENTS_TIMEOUT	(1U << 7)
>  
>  /*
>   * io_uring_register(2) opcodes and arguments
> @@ -290,4 +292,9 @@ struct io_uring_probe {
>  	struct io_uring_probe_op ops[0];
>  };
>  
> +struct io_uring_getevents_arg {
> +	sigset_t *sigmask;
> +	struct __kernel_timespec *ts;
> +};
> +
>  #endif
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] io_uring: add timeout support for io_uring_enter()
  2020-08-24  1:49 ` Jiufei Xue
@ 2020-10-30 12:50   ` Pavel Begunkov
  2020-10-30 21:33     ` Jens Axboe
  0 siblings, 1 reply; 6+ messages in thread
From: Pavel Begunkov @ 2020-10-30 12:50 UTC (permalink / raw)
  To: Jiufei Xue, axboe; +Cc: io-uring, metze

On 24/08/2020 02:49, Jiufei Xue wrote:
> ping...
> 
> On 2020/8/4 下午5:28, Jiufei Xue wrote:
>> Now users who want to get woken when waiting for events should submit a
>> timeout command first. It is not safe for applications that split SQ and
>> CQ handling between two threads, such as mysql. Users should synchronize
>> the two threads explicitly to protect SQ and that will impact the
>> performance.
>>
>> This patch adds support for timeout to existing io_uring_enter(). To
>> avoid overloading arguments, it introduces a new parameter structure
>> which contains sigmask and timeout.
>>
>> I have tested the workloads with one thread submiting nop requests
>> while the other reaping the cqe with timeout. It shows 1.8~2x faster
>> when the iodepth is 16.

What happened with this? I thought there were enough people wanting
such a thing.

>>
>> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
>> ---
>>  fs/io_uring.c                 | 45 +++++++++++++++++++++++++++++++++++++------
>>  include/uapi/linux/io_uring.h |  7 +++++++
>>  2 files changed, 46 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 2a3af95..cdd89e4 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -6514,7 +6514,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
>>   * application must reap them itself, as they reside on the shared cq ring.
>>   */
>>  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>> -			  const sigset_t __user *sig, size_t sigsz)
>> +			  const sigset_t __user *sig, size_t sigsz,
>> +			  struct __kernel_timespec __user *uts)
>>  {
>>  	struct io_wait_queue iowq = {
>>  		.wq = {
>> @@ -6526,6 +6527,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>>  		.to_wait	= min_events,
>>  	};
>>  	struct io_rings *rings = ctx->rings;
>> +	struct timespec64 ts;
>> +	signed long timeout = 0;
>>  	int ret = 0;
>>  
>>  	do {
>> @@ -6548,6 +6551,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>>  			return ret;
>>  	}
>>  
>> +	if (uts) {
>> +		if (get_timespec64(&ts, uts))
>> +			return -EFAULT;
>> +		timeout = timespec64_to_jiffies(&ts);
>> +	}
>> +
>>  	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
>>  	trace_io_uring_cqring_wait(ctx, min_events);
>>  	do {
>> @@ -6569,7 +6578,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>>  		}
>>  		if (io_should_wake(&iowq, false))
>>  			break;
>> -		schedule();
>> +		if (uts) {
>> +			if ((timeout = schedule_timeout(timeout)) == 0) {
>> +				ret = -ETIME;
>> +				break;
>> +			}
>> +		} else {
>> +			schedule();
>> +		}
>>  	} while (1);
>>  	finish_wait(&ctx->wait, &iowq.wq);
>>  
>> @@ -7993,19 +8009,36 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>>  #endif /* !CONFIG_MMU */
>>  
>>  SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
>> -		u32, min_complete, u32, flags, const sigset_t __user *, sig,
>> +		u32, min_complete, u32, flags, const void __user *, argp,
>>  		size_t, sigsz)
>>  {
>>  	struct io_ring_ctx *ctx;
>>  	long ret = -EBADF;
>>  	int submitted = 0;
>>  	struct fd f;
>> +	const sigset_t __user *sig;
>> +	struct __kernel_timespec __user *ts;
>> +	struct io_uring_getevents_arg arg;
>>  
>>  	io_run_task_work();
>>  
>> -	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
>> +	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
>> +		      IORING_ENTER_GETEVENTS_TIMEOUT))
>>  		return -EINVAL;
>>  
>> +	/* deal with IORING_ENTER_GETEVENTS_TIMEOUT */
>> +	if (flags & IORING_ENTER_GETEVENTS_TIMEOUT) {
>> +		if (!(flags & IORING_ENTER_GETEVENTS))
>> +			return -EINVAL;
>> +		if (copy_from_user(&arg, argp, sizeof(arg)))
>> +			return -EFAULT;
>> +		sig = arg.sigmask;
>> +		ts = arg.ts;
>> +	} else {
>> +		sig = (const sigset_t __user *)argp;
>> +		ts = NULL;
>> +	}
>> +
>>  	f = fdget(fd);
>>  	if (!f.file)
>>  		return -EBADF;
>> @@ -8052,7 +8085,7 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>>  		    !(ctx->flags & IORING_SETUP_SQPOLL)) {
>>  			ret = io_iopoll_check(ctx, min_complete);
>>  		} else {
>> -			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
>> +			ret = io_cqring_wait(ctx, min_complete, sig, sigsz, ts);
>>  		}
>>  	}
>>  
>> @@ -8346,7 +8379,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
>>  	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
>>  			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
>>  			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
>> -			IORING_FEAT_POLL_32BITS;
>> +			IORING_FEAT_POLL_32BITS | IORING_FEAT_GETEVENTS_TIMEOUT;
>>  
>>  	if (copy_to_user(params, p, sizeof(*p))) {
>>  		ret = -EFAULT;
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index d65fde7..70764d2 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -224,6 +224,7 @@ struct io_cqring_offsets {
>>   */
>>  #define IORING_ENTER_GETEVENTS	(1U << 0)
>>  #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
>> +#define IORING_ENTER_GETEVENTS_TIMEOUT	(1U << 2)
>>  
>>  /*
>>   * Passed in for io_uring_setup(2). Copied back with updated info on success
>> @@ -251,6 +252,7 @@ struct io_uring_params {
>>  #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
>>  #define IORING_FEAT_FAST_POLL		(1U << 5)
>>  #define IORING_FEAT_POLL_32BITS 	(1U << 6)
>> +#define IORING_FEAT_GETEVENTS_TIMEOUT	(1U << 7)
>>  
>>  /*
>>   * io_uring_register(2) opcodes and arguments
>> @@ -290,4 +292,9 @@ struct io_uring_probe {
>>  	struct io_uring_probe_op ops[0];
>>  };
>>  
>> +struct io_uring_getevents_arg {
>> +	sigset_t *sigmask;
>> +	struct __kernel_timespec *ts;
>> +};
>> +
>>  #endif
>>

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] io_uring: add timeout support for io_uring_enter()
  2020-10-30 12:50   ` Pavel Begunkov
@ 2020-10-30 21:33     ` Jens Axboe
  0 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-10-30 21:33 UTC (permalink / raw)
  To: Pavel Begunkov, Jiufei Xue; +Cc: io-uring, metze

On 10/30/20 6:50 AM, Pavel Begunkov wrote:
> On 24/08/2020 02:49, Jiufei Xue wrote:
>> ping...
>>
>> On 2020/8/4 下午5:28, Jiufei Xue wrote:
>>> Now users who want to get woken when waiting for events should submit a
>>> timeout command first. It is not safe for applications that split SQ and
>>> CQ handling between two threads, such as mysql. Users should synchronize
>>> the two threads explicitly to protect SQ and that will impact the
>>> performance.
>>>
>>> This patch adds support for timeout to existing io_uring_enter(). To
>>> avoid overloading arguments, it introduces a new parameter structure
>>> which contains sigmask and timeout.
>>>
>>> I have tested the workloads with one thread submiting nop requests
>>> while the other reaping the cqe with timeout. It shows 1.8~2x faster
>>> when the iodepth is 16.
> 
> What happened with this? I thought there were enough people wanting
> such a thing.

I think there are, feel free to run with it. The patch looks reasonable
to me. Jiufei, I'm assuming you guys are already using something like
this?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] io_uring: add timeout support for io_uring_enter()
  2020-08-04  9:28 [PATCH v2] io_uring: add timeout support for io_uring_enter() Jiufei Xue
  2020-08-13  2:08 ` Jiufei Xue
  2020-08-24  1:49 ` Jiufei Xue
@ 2020-10-31  2:54 ` Jens Axboe
  2 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-10-31  2:54 UTC (permalink / raw)
  To: Jiufei Xue; +Cc: io-uring, metze

On 8/4/20 3:28 AM, Jiufei Xue wrote:
> @@ -6569,7 +6578,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  		}
>  		if (io_should_wake(&iowq, false))
>  			break;
> -		schedule();
> +		if (uts) {
> +			if ((timeout = schedule_timeout(timeout)) == 0) {
> +				ret = -ETIME;
> +				break;
> +			}

Please don't combine lines, this is a lot more readable as:

timeout = schedule_timeout(timeout);
if (!timeout) {
	...
}

> @@ -7993,19 +8009,36 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
>  #endif /* !CONFIG_MMU */
>  
>  SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
> -		u32, min_complete, u32, flags, const sigset_t __user *, sig,
> +		u32, min_complete, u32, flags, const void __user *, argp,
>  		size_t, sigsz)
>  {
>  	struct io_ring_ctx *ctx;
>  	long ret = -EBADF;
>  	int submitted = 0;
>  	struct fd f;
> +	const sigset_t __user *sig;
> +	struct __kernel_timespec __user *ts;
> +	struct io_uring_getevents_arg arg;
>  
>  	io_run_task_work();
>  
> -	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
> +	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
> +		      IORING_ENTER_GETEVENTS_TIMEOUT))
>  		return -EINVAL;
>  
> +	/* deal with IORING_ENTER_GETEVENTS_TIMEOUT */
> +	if (flags & IORING_ENTER_GETEVENTS_TIMEOUT) {
> +		if (!(flags & IORING_ENTER_GETEVENTS))
> +			return -EINVAL;
> +		if (copy_from_user(&arg, argp, sizeof(arg)))
> +			return -EFAULT;
> +		sig = arg.sigmask;
> +		ts = arg.ts;
> +	} else {

I like the idea of the arg structure, but why don't we utilize the
size_t argument for that struct? That would allow flexibility on
potentially changing that structure in the future. You can use it for
versioning, basically. So I'd require that to be sizeof(arg), and the
above should then add:

	if (flags & IORING_ENTER_GETEVENTS_TIMEOUT) {
		if (!(flags & IORING_ENTER_GETEVENTS))
			return -EINVAL;
		if (sigsz != sizeof(arg))
			return -EINVAL;
		...

> @@ -290,4 +292,9 @@ struct io_uring_probe {
>  	struct io_uring_probe_op ops[0];
>  };
>  
> +struct io_uring_getevents_arg {
> +	sigset_t *sigmask;
> +	struct __kernel_timespec *ts;
> +};
> +

This structure isn't the same size on 32-bit and 64-bit, that should be
rectified. I'd make the sigmask a u64 type of some sort.

So little things - but please resend with the suggested changes and we
can move forward with this patch for 5.11.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-10-31  2:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-04  9:28 [PATCH v2] io_uring: add timeout support for io_uring_enter() Jiufei Xue
2020-08-13  2:08 ` Jiufei Xue
2020-08-24  1:49 ` Jiufei Xue
2020-10-30 12:50   ` Pavel Begunkov
2020-10-30 21:33     ` Jens Axboe
2020-10-31  2:54 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).