io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation
@ 2020-06-17  9:53 Jiufei Xue
  2020-06-17  9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue
  2020-06-17  9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue
  0 siblings, 2 replies; 4+ messages in thread
From: Jiufei Xue @ 2020-06-17  9:53 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, joseph.qi

Applications can use this flag to avoid accept thundering herd type
behavior.

Jiufei Xue (2):
  io_uring: change the poll type to be 32-bits
  io_uring: use EPOLLEXCLUSIVE flag to aoid thundering


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v4 1/2] io_uring: change the poll type to be 32-bits
  2020-06-17  9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue
@ 2020-06-17  9:53 ` Jiufei Xue
  2020-06-17 15:15   ` Jens Axboe
  2020-06-17  9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue
  1 sibling, 1 reply; 4+ messages in thread
From: Jiufei Xue @ 2020-06-17  9:53 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, joseph.qi

poll events should be 32-bits to cover EPOLLEXCLUSIVE.

Explicit word-swap the poll32_events for big endian to make sure the ABI
is not changed.  We call this feature IORING_FEAT_POLL_32BITS,
applications who want to use EPOLLEXCLUSIVE should check the feature bit
first.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
---
 fs/io_uring.c                 | 13 +++++++++----
 include/uapi/linux/io_uring.h |  4 +++-
 tools/io_uring/liburing.h     |  6 +++++-
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 155f3d8..fe935cf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4543,7 +4543,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_poll_iocb *poll = &req->poll;
-	u16 events;
+	u32 events;
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
@@ -4552,7 +4552,10 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 	if (!poll->file)
 		return -EBADF;
 
-	events = READ_ONCE(sqe->poll_events);
+	events = READ_ONCE(sqe->poll32_events);
+#ifdef __BIG_ENDIAN
+	events = swahw32(events);
+#endif
 	poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
 
 	get_task_struct(current);
@@ -7865,7 +7868,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
-			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
+			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
+			IORING_FEAT_POLL_32BITS;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
@@ -8154,7 +8158,8 @@ static int __init io_uring_init(void)
 	BUILD_BUG_SQE_ELEM(28, /* compat */   int, rw_flags);
 	BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
 	BUILD_BUG_SQE_ELEM(28, __u32,  fsync_flags);
-	BUILD_BUG_SQE_ELEM(28, __u16,  poll_events);
+	BUILD_BUG_SQE_ELEM(28, /* compat */ __u16,  poll_events);
+	BUILD_BUG_SQE_ELEM(28, __u32,  poll32_events);
 	BUILD_BUG_SQE_ELEM(28, __u32,  sync_range_flags);
 	BUILD_BUG_SQE_ELEM(28, __u32,  msg_flags);
 	BUILD_BUG_SQE_ELEM(28, __u32,  timeout_flags);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 92c2269..8d03396 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -31,7 +31,8 @@ struct io_uring_sqe {
 	union {
 		__kernel_rwf_t	rw_flags;
 		__u32		fsync_flags;
-		__u16		poll_events;
+		__u16		poll_events;	/* compatibility */
+		__u32		poll32_events;	/* word-reversed for BE */
 		__u32		sync_range_flags;
 		__u32		msg_flags;
 		__u32		timeout_flags;
@@ -248,6 +249,7 @@ struct io_uring_params {
 #define IORING_FEAT_RW_CUR_POS		(1U << 3)
 #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
 #define IORING_FEAT_FAST_POLL		(1U << 5)
+#define IORING_FEAT_POLL_32BITS 	(1U << 6)
 
 /*
  * io_uring_register(2) opcodes and arguments
diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
index 5f305c8..28a837b 100644
--- a/tools/io_uring/liburing.h
+++ b/tools/io_uring/liburing.h
@@ -10,6 +10,7 @@
 #include <string.h>
 #include "../../include/uapi/linux/io_uring.h"
 #include <inttypes.h>
+#include <linux/swab.h>
 #include "barrier.h"
 
 /*
@@ -145,11 +146,14 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
 }
 
 static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
-					  short poll_mask)
+					  unsigned poll_mask)
 {
 	memset(sqe, 0, sizeof(*sqe));
 	sqe->opcode = IORING_OP_POLL_ADD;
 	sqe->fd = fd;
+#if __BYTE_ORDER == __BIG_ENDIAN
+	poll_mask = __swahw32(poll_mask);
+#endif
 	sqe->poll_events = poll_mask;
 }
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior
  2020-06-17  9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue
  2020-06-17  9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue
@ 2020-06-17  9:53 ` Jiufei Xue
  1 sibling, 0 replies; 4+ messages in thread
From: Jiufei Xue @ 2020-06-17  9:53 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, joseph.qi

Applications can pass this flag in to avoid accept thundering herd.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
---
 fs/io_uring.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fe935cf..f156eba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4225,7 +4225,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 
 	pt->error = 0;
 	poll->head = head;
-	add_wait_queue(head, &poll->wait);
+
+	if (poll->events & EPOLLEXCLUSIVE)
+		add_wait_queue_exclusive(head, &poll->wait);
+	else
+		add_wait_queue(head, &poll->wait);
 }
 
 static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
@@ -4556,7 +4560,8 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 #ifdef __BIG_ENDIAN
 	events = swahw32(events);
 #endif
-	poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+	poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP |
+		       (events & EPOLLEXCLUSIVE);
 
 	get_task_struct(current);
 	req->task = current;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v4 1/2] io_uring: change the poll type to be 32-bits
  2020-06-17  9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue
@ 2020-06-17 15:15   ` Jens Axboe
  0 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2020-06-17 15:15 UTC (permalink / raw)
  To: Jiufei Xue, io-uring; +Cc: joseph.qi

On 6/17/20 3:53 AM, Jiufei Xue wrote:
> poll events should be 32-bits to cover EPOLLEXCLUSIVE.
> 
> Explicit word-swap the poll32_events for big endian to make sure the ABI
> is not changed.  We call this feature IORING_FEAT_POLL_32BITS,
> applications who want to use EPOLLEXCLUSIVE should check the feature bit
> first.
> 
> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
> ---
>  fs/io_uring.c                 | 13 +++++++++----
>  include/uapi/linux/io_uring.h |  4 +++-
>  tools/io_uring/liburing.h     |  6 +++++-
>  3 files changed, 17 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 155f3d8..fe935cf 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -4543,7 +4543,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
>  static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  {
>  	struct io_poll_iocb *poll = &req->poll;
> -	u16 events;
> +	u32 events;
>  
>  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
>  		return -EINVAL;
> @@ -4552,7 +4552,10 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
>  	if (!poll->file)
>  		return -EBADF;
>  
> -	events = READ_ONCE(sqe->poll_events);
> +	events = READ_ONCE(sqe->poll32_events);
> +#ifdef __BIG_ENDIAN
> +	events = swahw32(events);
> +#endif
>  	poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
>  
>  	get_task_struct(current);
> @@ -7865,7 +7868,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
>  
>  	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
>  			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
> -			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
> +			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
> +			IORING_FEAT_POLL_32BITS;
>  
>  	if (copy_to_user(params, p, sizeof(*p))) {
>  		ret = -EFAULT;
> @@ -8154,7 +8158,8 @@ static int __init io_uring_init(void)
>  	BUILD_BUG_SQE_ELEM(28, /* compat */   int, rw_flags);
>  	BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
>  	BUILD_BUG_SQE_ELEM(28, __u32,  fsync_flags);
> -	BUILD_BUG_SQE_ELEM(28, __u16,  poll_events);
> +	BUILD_BUG_SQE_ELEM(28, /* compat */ __u16,  poll_events);
> +	BUILD_BUG_SQE_ELEM(28, __u32,  poll32_events);
>  	BUILD_BUG_SQE_ELEM(28, __u32,  sync_range_flags);
>  	BUILD_BUG_SQE_ELEM(28, __u32,  msg_flags);
>  	BUILD_BUG_SQE_ELEM(28, __u32,  timeout_flags);
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 92c2269..8d03396 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -31,7 +31,8 @@ struct io_uring_sqe {
>  	union {
>  		__kernel_rwf_t	rw_flags;
>  		__u32		fsync_flags;
> -		__u16		poll_events;
> +		__u16		poll_events;	/* compatibility */
> +		__u32		poll32_events;	/* word-reversed for BE */
>  		__u32		sync_range_flags;
>  		__u32		msg_flags;
>  		__u32		timeout_flags;
> @@ -248,6 +249,7 @@ struct io_uring_params {
>  #define IORING_FEAT_RW_CUR_POS		(1U << 3)
>  #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
>  #define IORING_FEAT_FAST_POLL		(1U << 5)
> +#define IORING_FEAT_POLL_32BITS 	(1U << 6)
>  
>  /*
>   * io_uring_register(2) opcodes and arguments
> diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
> index 5f305c8..28a837b 100644
> --- a/tools/io_uring/liburing.h
> +++ b/tools/io_uring/liburing.h
> @@ -10,6 +10,7 @@
>  #include <string.h>
>  #include "../../include/uapi/linux/io_uring.h"
>  #include <inttypes.h>
> +#include <linux/swab.h>
>  #include "barrier.h"
>  
>  /*
> @@ -145,11 +146,14 @@ static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
>  }
>  
>  static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
> -					  short poll_mask)
> +					  unsigned poll_mask)
>  {
>  	memset(sqe, 0, sizeof(*sqe));
>  	sqe->opcode = IORING_OP_POLL_ADD;
>  	sqe->fd = fd;
> +#if __BYTE_ORDER == __BIG_ENDIAN
> +	poll_mask = __swahw32(poll_mask);
> +#endif
>  	sqe->poll_events = poll_mask;

This looks good to me now, but this one need not use the __ version, it
should just use the regular one as that's the one defined in the
non-uapi header.  But I'll just make that change, won't functionally do
anything.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-06-17 15:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-17  9:53 [PATCH v4 0/2] io_uring: add EPOLLEXCLUSIVE flag for POLL_ADD operation Jiufei Xue
2020-06-17  9:53 ` [PATCH v4 1/2] io_uring: change the poll type to be 32-bits Jiufei Xue
2020-06-17 15:15   ` Jens Axboe
2020-06-17  9:53 ` [PATCH v4 2/2] io_uring: use EPOLLEXCLUSIVE flag to aoid thundering herd type behavior Jiufei Xue

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).