linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap()
@ 2023-01-18 15:26 Kent Overstreet
  2023-01-18 15:26 ` [PATCH 2/2] fs/aio: obey min_nr when doing wakeups Kent Overstreet
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Kent Overstreet @ 2023-01-18 15:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: Kent Overstreet, linux-aio, linux-fsdevel

Originally, we used kmap() instead of kmap_atomic() for reading events
out of the completion ringbuffer because we're using copy_to_user(),
which can fault.

Now that kmap_local() is a thing, use that instead.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Benjamin LaHaise <bcrl@kvack.org
Cc: linux-aio@kvack.org
Cc: linux-fsdevel@vger.kernel.org
---
 fs/aio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 5b2ff20ad3..3f795ed2a2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1246,10 +1246,10 @@ static long aio_read_events_ring(struct kioctx *ctx,
 		avail = min(avail, nr - ret);
 		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
 
-		ev = kmap(page);
+		ev = kmap_local_page(page);
 		copy_ret = copy_to_user(event + ret, ev + pos,
 					sizeof(*ev) * avail);
-		kunmap(page);
+		kunmap_local(ev);
 
 		if (unlikely(copy_ret)) {
 			ret = -EFAULT;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] fs/aio: obey min_nr when doing wakeups
  2023-01-18 15:26 [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap() Kent Overstreet
@ 2023-01-18 15:26 ` Kent Overstreet
  2023-01-18 18:19 ` [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap() Jeff Moyer
  2023-01-20 14:03 ` [PATCH v2] fs/aio: obey min_nr when doing wakeups Kent Overstreet
  2 siblings, 0 replies; 7+ messages in thread
From: Kent Overstreet @ 2023-01-18 15:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: Kent Overstreet, linux-aio, linux-fsdevel

I've been observing workloads where IPIs due to wakeups in
aio_complete() are ~15% of total CPU time in the profile. Most of those
wakeups are unnecessary when completion batching is in use in
io_getevents().

This plumbs min_nr through via the wait eventry, so that aio_complete()
can avoid doing unnecessary wakeups.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Benjamin LaHaise <bcrl@kvack.org
Cc: linux-aio@kvack.org
Cc: linux-fsdevel@vger.kernel.org
---
 fs/aio.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 3f795ed2a2..a03bc93016 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1105,6 +1105,11 @@ static inline void iocb_destroy(struct aio_kiocb *iocb)
 	kmem_cache_free(kiocb_cachep, iocb);
 }
 
+struct aio_waiter {
+	struct wait_queue_entry	w;
+	size_t			min_nr;
+};
+
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
@@ -1113,7 +1118,7 @@ static void aio_complete(struct aio_kiocb *iocb)
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
-	unsigned tail, pos, head;
+	unsigned tail, pos, head, avail;
 	unsigned long	flags;
 
 	/*
@@ -1157,6 +1162,10 @@ static void aio_complete(struct aio_kiocb *iocb)
 	ctx->completed_events++;
 	if (ctx->completed_events > 1)
 		refill_reqs_available(ctx, head, tail);
+
+	avail = tail > head
+		? tail - head
+		: tail + ctx->nr_events - head;
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
 	pr_debug("added to ring %p at [%u]\n", iocb, tail);
@@ -1177,8 +1186,18 @@ static void aio_complete(struct aio_kiocb *iocb)
 	 */
 	smp_mb();
 
-	if (waitqueue_active(&ctx->wait))
-		wake_up(&ctx->wait);
+	if (waitqueue_active(&ctx->wait)) {
+		struct aio_waiter *curr, *next;
+		unsigned long flags;
+
+		spin_lock_irqsave(&ctx->wait.lock, flags);
+		list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
+			if (avail >= curr->min_nr) {
+				list_del_init_careful(&curr->w.entry);
+				wake_up_process(curr->w.private);
+			}
+		spin_unlock_irqrestore(&ctx->wait.lock, flags);
+	}
 }
 
 static inline void iocb_put(struct aio_kiocb *iocb)
@@ -1294,7 +1313,9 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 			struct io_event __user *event,
 			ktime_t until)
 {
-	long ret = 0;
+	struct hrtimer_sleeper	t;
+	struct aio_waiter	w;
+	long ret = 0, ret2 = 0;
 
 	/*
 	 * Note that aio_read_events() is being called as the conditional - i.e.
@@ -1310,12 +1331,34 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 	 * the ringbuffer empty. So in practice we should be ok, but it's
 	 * something to be aware of when touching this code.
 	 */
-	if (until == 0)
-		aio_read_events(ctx, min_nr, nr, event, &ret);
-	else
-		wait_event_interruptible_hrtimeout(ctx->wait,
-				aio_read_events(ctx, min_nr, nr, event, &ret),
-				until);
+	aio_read_events(ctx, min_nr, nr, event, &ret);
+	if (until == 0 || ret < 0 || ret >= min_nr)
+		return ret;
+
+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	if (until != KTIME_MAX) {
+		hrtimer_set_expires_range_ns(&t.timer, until, current->timer_slack_ns);
+		hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
+	}
+
+	init_wait(&w.w);
+
+	while (1) {
+		w.min_nr = min_nr - ret;
+
+		ret2 = prepare_to_wait_event(&ctx->wait, &w.w, TASK_INTERRUPTIBLE) ?:
+			!t.task ? -ETIME : 0;
+
+		if (aio_read_events(ctx, min_nr, nr, event, &ret) || ret2)
+			break;
+
+		schedule();
+	}
+
+	finish_wait(&ctx->wait, &w.w);
+	hrtimer_cancel(&t.timer);
+	destroy_hrtimer_on_stack(&t.timer);
+
 	return ret;
 }
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap()
  2023-01-18 15:26 [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap() Kent Overstreet
  2023-01-18 15:26 ` [PATCH 2/2] fs/aio: obey min_nr when doing wakeups Kent Overstreet
@ 2023-01-18 18:19 ` Jeff Moyer
  2023-01-20 14:03 ` [PATCH v2] fs/aio: obey min_nr when doing wakeups Kent Overstreet
  2 siblings, 0 replies; 7+ messages in thread
From: Jeff Moyer @ 2023-01-18 18:19 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-kernel, linux-aio, linux-fsdevel, fmdefrancesco

Hi, Kent,

Kent Overstreet <kent.overstreet@linux.dev> writes:

> Originally, we used kmap() instead of kmap_atomic() for reading events
> out of the completion ringbuffer because we're using copy_to_user(),
> which can fault.
>
> Now that kmap_local() is a thing, use that instead.

This has already been proposed as part of a more comprehensive patch:
  https://lore.kernel.org/linux-fsdevel/20230109175629.9482-1-fmdefrancesco@gmail.com/

Would you be willing to review that one?

Thanks!
Jeff

>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> Cc: Benjamin LaHaise <bcrl@kvack.org
> Cc: linux-aio@kvack.org
> Cc: linux-fsdevel@vger.kernel.org
> ---
>  fs/aio.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 5b2ff20ad3..3f795ed2a2 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1246,10 +1246,10 @@ static long aio_read_events_ring(struct kioctx *ctx,
>  		avail = min(avail, nr - ret);
>  		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
>  
> -		ev = kmap(page);
> +		ev = kmap_local_page(page);
>  		copy_ret = copy_to_user(event + ret, ev + pos,
>  					sizeof(*ev) * avail);
> -		kunmap(page);
> +		kunmap_local(ev);
>  
>  		if (unlikely(copy_ret)) {
>  			ret = -EFAULT;


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2] fs/aio: obey min_nr when doing wakeups
  2023-01-18 15:26 [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap() Kent Overstreet
  2023-01-18 15:26 ` [PATCH 2/2] fs/aio: obey min_nr when doing wakeups Kent Overstreet
  2023-01-18 18:19 ` [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap() Jeff Moyer
@ 2023-01-20 14:03 ` Kent Overstreet
  2023-01-20 19:47   ` Jeff Moyer
  2 siblings, 1 reply; 7+ messages in thread
From: Kent Overstreet @ 2023-01-20 14:03 UTC (permalink / raw)
  To: linux-kernel; +Cc: Kent Overstreet, linux-aio, linux-fsdevel

I've been observing workloads where IPIs due to wakeups in
aio_complete() are ~15% of total CPU time in the profile. Most of those
wakeups are unnecessary when completion batching is in use in
io_getevents().

This plumbs min_nr through via the wait eventry, so that aio_complete()
can avoid doing unnecessary wakeups.

v2: This fixes a race in the first version of the patch. If we read some
events out after adding to the waitlist, we need to update wait.min_nr
call prepare_to_wait_event() again before scheduling.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Benjamin LaHaise <bcrl@kvack.org
Cc: linux-aio@kvack.org
Cc: linux-fsdevel@vger.kernel.org
---
 fs/aio.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 56 insertions(+), 10 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 3f795ed2a2..5be35cb8ec 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1105,6 +1105,11 @@ static inline void iocb_destroy(struct aio_kiocb *iocb)
 	kmem_cache_free(kiocb_cachep, iocb);
 }
 
+struct aio_waiter {
+	struct wait_queue_entry	w;
+	size_t			min_nr;
+};
+
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
@@ -1113,7 +1118,7 @@ static void aio_complete(struct aio_kiocb *iocb)
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
-	unsigned tail, pos, head;
+	unsigned tail, pos, head, avail;
 	unsigned long	flags;
 
 	/*
@@ -1157,6 +1162,10 @@ static void aio_complete(struct aio_kiocb *iocb)
 	ctx->completed_events++;
 	if (ctx->completed_events > 1)
 		refill_reqs_available(ctx, head, tail);
+
+	avail = tail > head
+		? tail - head
+		: tail + ctx->nr_events - head;
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
 	pr_debug("added to ring %p at [%u]\n", iocb, tail);
@@ -1177,8 +1186,18 @@ static void aio_complete(struct aio_kiocb *iocb)
 	 */
 	smp_mb();
 
-	if (waitqueue_active(&ctx->wait))
-		wake_up(&ctx->wait);
+	if (waitqueue_active(&ctx->wait)) {
+		struct aio_waiter *curr, *next;
+		unsigned long flags;
+
+		spin_lock_irqsave(&ctx->wait.lock, flags);
+		list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
+			if (avail >= curr->min_nr) {
+				list_del_init_careful(&curr->w.entry);
+				wake_up_process(curr->w.private);
+			}
+		spin_unlock_irqrestore(&ctx->wait.lock, flags);
+	}
 }
 
 static inline void iocb_put(struct aio_kiocb *iocb)
@@ -1294,7 +1313,9 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 			struct io_event __user *event,
 			ktime_t until)
 {
-	long ret = 0;
+	struct hrtimer_sleeper	t;
+	struct aio_waiter	w;
+	long ret = 0, ret2 = 0;
 
 	/*
 	 * Note that aio_read_events() is being called as the conditional - i.e.
@@ -1310,12 +1331,37 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
 	 * the ringbuffer empty. So in practice we should be ok, but it's
 	 * something to be aware of when touching this code.
 	 */
-	if (until == 0)
-		aio_read_events(ctx, min_nr, nr, event, &ret);
-	else
-		wait_event_interruptible_hrtimeout(ctx->wait,
-				aio_read_events(ctx, min_nr, nr, event, &ret),
-				until);
+	aio_read_events(ctx, min_nr, nr, event, &ret);
+	if (until == 0 || ret < 0 || ret >= min_nr)
+		return ret;
+
+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	if (until != KTIME_MAX) {
+		hrtimer_set_expires_range_ns(&t.timer, until, current->timer_slack_ns);
+		hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
+	}
+
+	init_wait(&w.w);
+
+	while (1) {
+		unsigned long nr_got = ret;
+
+		w.min_nr = min_nr - ret;
+
+		ret2 = prepare_to_wait_event(&ctx->wait, &w.w, TASK_INTERRUPTIBLE) ?:
+			!t.task ? -ETIME : 0;
+
+		if (aio_read_events(ctx, min_nr, nr, event, &ret) || ret2)
+			break;
+
+		if (nr_got == ret)
+			schedule();
+	}
+
+	finish_wait(&ctx->wait, &w.w);
+	hrtimer_cancel(&t.timer);
+	destroy_hrtimer_on_stack(&t.timer);
+
 	return ret;
 }
 
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] fs/aio: obey min_nr when doing wakeups
  2023-01-20 14:03 ` [PATCH v2] fs/aio: obey min_nr when doing wakeups Kent Overstreet
@ 2023-01-20 19:47   ` Jeff Moyer
  2023-01-23 16:17     ` Jeff Moyer
  0 siblings, 1 reply; 7+ messages in thread
From: Jeff Moyer @ 2023-01-20 19:47 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-kernel, linux-aio, linux-fsdevel

Hi, Kent,

Kent Overstreet <kent.overstreet@linux.dev> writes:

> I've been observing workloads where IPIs due to wakeups in
> aio_complete() are ~15% of total CPU time in the profile. Most of those
> wakeups are unnecessary when completion batching is in use in
> io_getevents().
>
> This plumbs min_nr through via the wait eventry, so that aio_complete()
> can avoid doing unnecessary wakeups.
>
> v2: This fixes a race in the first version of the patch. If we read some
> events out after adding to the waitlist, we need to update wait.min_nr
> call prepare_to_wait_event() again before scheduling.

I like the idea of the patch, and I'll get some real world performance
numbers soon.  But first, this version (and the previous version as
well) fails test case 23 in the libaio regression test suite:

Starting cases/23.p
FAIL: poll missed an event!
FAIL: poll missed an event!
test cases/23.t completed FAILED.

I started to look into it, but didn't see anything obvious yet.  My test
kernel has the kmap_local patch applied as well, fyi.

Thanks!
Jeff

>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> Cc: Benjamin LaHaise <bcrl@kvack.org
> Cc: linux-aio@kvack.org
> Cc: linux-fsdevel@vger.kernel.org
> ---
>  fs/aio.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 56 insertions(+), 10 deletions(-)
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 3f795ed2a2..5be35cb8ec 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1105,6 +1105,11 @@ static inline void iocb_destroy(struct aio_kiocb *iocb)
>  	kmem_cache_free(kiocb_cachep, iocb);
>  }
>  
> +struct aio_waiter {
> +	struct wait_queue_entry	w;
> +	size_t			min_nr;
> +};
> +
>  /* aio_complete
>   *	Called when the io request on the given iocb is complete.
>   */
> @@ -1113,7 +1118,7 @@ static void aio_complete(struct aio_kiocb *iocb)
>  	struct kioctx	*ctx = iocb->ki_ctx;
>  	struct aio_ring	*ring;
>  	struct io_event	*ev_page, *event;
> -	unsigned tail, pos, head;
> +	unsigned tail, pos, head, avail;
>  	unsigned long	flags;
>  
>  	/*
> @@ -1157,6 +1162,10 @@ static void aio_complete(struct aio_kiocb *iocb)
>  	ctx->completed_events++;
>  	if (ctx->completed_events > 1)
>  		refill_reqs_available(ctx, head, tail);
> +
> +	avail = tail > head
> +		? tail - head
> +		: tail + ctx->nr_events - head;
>  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
>  
>  	pr_debug("added to ring %p at [%u]\n", iocb, tail);
> @@ -1177,8 +1186,18 @@ static void aio_complete(struct aio_kiocb *iocb)
>  	 */
>  	smp_mb();
>  
> -	if (waitqueue_active(&ctx->wait))
> -		wake_up(&ctx->wait);
> +	if (waitqueue_active(&ctx->wait)) {
> +		struct aio_waiter *curr, *next;
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&ctx->wait.lock, flags);
> +		list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
> +			if (avail >= curr->min_nr) {
> +				list_del_init_careful(&curr->w.entry);
> +				wake_up_process(curr->w.private);
> +			}
> +		spin_unlock_irqrestore(&ctx->wait.lock, flags);
> +	}
>  }
>  
>  static inline void iocb_put(struct aio_kiocb *iocb)
> @@ -1294,7 +1313,9 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
>  			struct io_event __user *event,
>  			ktime_t until)
>  {
> -	long ret = 0;
> +	struct hrtimer_sleeper	t;
> +	struct aio_waiter	w;
> +	long ret = 0, ret2 = 0;
>  
>  	/*
>  	 * Note that aio_read_events() is being called as the conditional - i.e.
> @@ -1310,12 +1331,37 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
>  	 * the ringbuffer empty. So in practice we should be ok, but it's
>  	 * something to be aware of when touching this code.
>  	 */
> -	if (until == 0)
> -		aio_read_events(ctx, min_nr, nr, event, &ret);
> -	else
> -		wait_event_interruptible_hrtimeout(ctx->wait,
> -				aio_read_events(ctx, min_nr, nr, event, &ret),
> -				until);
> +	aio_read_events(ctx, min_nr, nr, event, &ret);
> +	if (until == 0 || ret < 0 || ret >= min_nr)
> +		return ret;
> +
> +	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	if (until != KTIME_MAX) {
> +		hrtimer_set_expires_range_ns(&t.timer, until, current->timer_slack_ns);
> +		hrtimer_sleeper_start_expires(&t, HRTIMER_MODE_REL);
> +	}
> +
> +	init_wait(&w.w);
> +
> +	while (1) {
> +		unsigned long nr_got = ret;
> +
> +		w.min_nr = min_nr - ret;
> +
> +		ret2 = prepare_to_wait_event(&ctx->wait, &w.w, TASK_INTERRUPTIBLE) ?:
> +			!t.task ? -ETIME : 0;
> +
> +		if (aio_read_events(ctx, min_nr, nr, event, &ret) || ret2)
> +			break;
> +
> +		if (nr_got == ret)
> +			schedule();
> +	}
> +
> +	finish_wait(&ctx->wait, &w.w);
> +	hrtimer_cancel(&t.timer);
> +	destroy_hrtimer_on_stack(&t.timer);
> +
>  	return ret;
>  }


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] fs/aio: obey min_nr when doing wakeups
  2023-01-20 19:47   ` Jeff Moyer
@ 2023-01-23 16:17     ` Jeff Moyer
  2023-01-23 19:54       ` Kent Overstreet
  0 siblings, 1 reply; 7+ messages in thread
From: Jeff Moyer @ 2023-01-23 16:17 UTC (permalink / raw)
  To: Kent Overstreet; +Cc: linux-kernel, linux-aio, linux-fsdevel

Jeff Moyer <jmoyer@redhat.com> writes:

> Hi, Kent,
>
> Kent Overstreet <kent.overstreet@linux.dev> writes:
>
>> I've been observing workloads where IPIs due to wakeups in
>> aio_complete() are ~15% of total CPU time in the profile. Most of those
>> wakeups are unnecessary when completion batching is in use in
>> io_getevents().
>>
>> This plumbs min_nr through via the wait eventry, so that aio_complete()
>> can avoid doing unnecessary wakeups.
>>
>> v2: This fixes a race in the first version of the patch. If we read some
>> events out after adding to the waitlist, we need to update wait.min_nr
>> call prepare_to_wait_event() again before scheduling.
>
> I like the idea of the patch, and I'll get some real world performance
> numbers soon.  But first, this version (and the previous version as
> well) fails test case 23 in the libaio regression test suite:
>
> Starting cases/23.p
> FAIL: poll missed an event!
> FAIL: poll missed an event!
> test cases/23.t completed FAILED.

It turns out that this only fails on the (relatively) old kernel against
which I applied the patches.  When I apply both patches to the latest
tree, there is no test failure.

Sorry for the noise, I'll be sure to test on the latest going forward.
Now to figure out what changed elsewhere to fix this....

Cheers,
Jeff


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] fs/aio: obey min_nr when doing wakeups
  2023-01-23 16:17     ` Jeff Moyer
@ 2023-01-23 19:54       ` Kent Overstreet
  0 siblings, 0 replies; 7+ messages in thread
From: Kent Overstreet @ 2023-01-23 19:54 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: linux-kernel, linux-aio, linux-fsdevel

On Mon, Jan 23, 2023 at 11:17:53AM -0500, Jeff Moyer wrote:
> Jeff Moyer <jmoyer@redhat.com> writes:
> 
> > Hi, Kent,
> >
> > Kent Overstreet <kent.overstreet@linux.dev> writes:
> >
> >> I've been observing workloads where IPIs due to wakeups in
> >> aio_complete() are ~15% of total CPU time in the profile. Most of those
> >> wakeups are unnecessary when completion batching is in use in
> >> io_getevents().
> >>
> >> This plumbs min_nr through via the wait eventry, so that aio_complete()
> >> can avoid doing unnecessary wakeups.
> >>
> >> v2: This fixes a race in the first version of the patch. If we read some
> >> events out after adding to the waitlist, we need to update wait.min_nr
> >> call prepare_to_wait_event() again before scheduling.
> >
> > I like the idea of the patch, and I'll get some real world performance
> > numbers soon.  But first, this version (and the previous version as
> > well) fails test case 23 in the libaio regression test suite:
> >
> > Starting cases/23.p
> > FAIL: poll missed an event!
> > FAIL: poll missed an event!
> > test cases/23.t completed FAILED.
> 
> It turns out that this only fails on the (relatively) old kernel against
> which I applied the patches.  When I apply both patches to the latest
> tree, there is no test failure.
> 
> Sorry for the noise, I'll be sure to test on the latest going forward.
> Now to figure out what changed elsewhere to fix this....

That's odd - let me know if you'd like me to take a look...

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-01-23 19:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-18 15:26 [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap() Kent Overstreet
2023-01-18 15:26 ` [PATCH 2/2] fs/aio: obey min_nr when doing wakeups Kent Overstreet
2023-01-18 18:19 ` [PATCH 1/2] fs/aio: Use kmap_local() instead of kmap() Jeff Moyer
2023-01-20 14:03 ` [PATCH v2] fs/aio: obey min_nr when doing wakeups Kent Overstreet
2023-01-20 19:47   ` Jeff Moyer
2023-01-23 16:17     ` Jeff Moyer
2023-01-23 19:54       ` Kent Overstreet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).