All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC v0] Use swait in completion
@ 2016-03-08 15:59 Daniel Wagner
  2016-03-08 15:59 ` [RFC v0] sched/completion: convert completions to use simple wait queues Daniel Wagner
  2016-03-08 17:52   ` Sebastian Andrzej Siewior
  0 siblings, 2 replies; 10+ messages in thread
From: Daniel Wagner @ 2016-03-08 15:59 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Peter Zijlstra (Intel),
	Thomas Gleixner, Sebastian Andrzej Siewior, Daniel Wagner

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

Hi,

As Peter correctly pointed out in [1] a simple conversion from
wait to swait in completion.c wont work. I played a bit around and
came up with this rather ugly idea.

So in case complete_all() is called in hard irq context we just wake
up one waiter and let that one call swake_up_all(). For this I needed
to somehow transfer this information from complete_all() to
wait_for_completion(). The only working idea I found was to introduce
a new flag in struct completion. Ideas to overcome this problem
are highly appreciated.

I did also some performance measurement with below test program. The
test creates a trigger thread and a bunch of waiter threads. The
trigger thread calls complete_all() either from thread context or from
hard irq context. Time needed for 1000 iterations measured. This was
done on a idle IvyBridge machine with 64 logial cores (E5-4610).

waiter_nr: number of waiter threads
irqwork: 0 complete_all() from thread context, 1 complete_all() from irq_work()

wait:
waiter_nr 64
irqwork 0

count    66.000000
mean      0.378318
std       0.018468
min       0.344000
25%       0.364000
50%       0.382500
75%       0.395000
max       0.407000


swait:
waiter_nr 64
irqwork 1

count    86.000000
mean      0.315221
std       0.007115
min       0.291000
25%       0.312000
50%       0.316500
75%       0.320000
max       0.329000

swait:
waiter_nr 64
irqwork 0

count    81.000000
mean      0.344642
std       0.021708
min       0.294000
25%       0.336000
50%       0.341000
75%       0.355000
max       0.403000

cheers,
daniel


completion-test.c:

#include <linux/module.h>
#include <linux/wait.h>
#include <linux/swait.h>
#include <linux/kthread.h>
#include <linux/threads.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/completion.h>
#include <linux/irq_work.h>

static unsigned int waiter_nr = 5;
static bool irqwork = true;
static unsigned int counter = 1000;

module_param(waiter_nr, uint,
		S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
MODULE_PARM_DESC(waiter_nr, "Number of waiter threads");

module_param(irqwork, bool,
		S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
MODULE_PARM_DESC(irqwork, "irqwork");

module_param(counter, uint,
		S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
MODULE_PARM_DESC(counter, "counter");

struct completion_test {
	/* We need two completions to avoid a race with reinit of the
	 * completion.
	 */
	struct completion sync_stage1;
	struct completion sync_stage2;

	wait_queue_head_t wq_stage1;
	wait_queue_head_t wq_stage2;

	atomic_t cnt_stage1;
	atomic_t cnt_stage2;

	struct irq_work irq_work;
};

static struct completion_test test_data;
static struct task_struct **waiter_tasks;
static struct task_struct *trigger_task;

static void trigger_irq(struct irq_work *arg)
{
	struct completion_test *ct =
		container_of(arg, struct completion_test, irq_work);

	complete_all(&ct->sync_stage1);
}

static int waiter(void *arg)
{
	struct completion_test *ct = arg;

	for (;;) {
		atomic_inc(&ct->cnt_stage1);
		wake_up(&ct->wq_stage1);
		wait_for_completion_interruptible(&ct->sync_stage1);
		if (kthread_should_stop())
			break;

		atomic_inc(&ct->cnt_stage2);
		wake_up(&ct->wq_stage2);
		wait_for_completion_interruptible(&ct->sync_stage2);
		if (kthread_should_stop())
			break;
	}
	return 0;
}

static int trigger(void *arg)
{
	struct completion_test *ct = arg;
	struct timespec ts_start, ts;
	unsigned long cnt;

	cnt = counter;
	ts_start = current_kernel_time();

	for (;;) {
		cnt--;
		if (cnt == 0) {
			ts = timespec_sub(current_kernel_time(), ts_start);
			printk("%ld.%.9ld\n", ts.tv_sec, ts.tv_nsec);

			cnt = counter;
			ts_start = current_kernel_time();
		}

		wait_event_interruptible(ct->wq_stage1,
				!(atomic_read(&ct->cnt_stage1) < waiter_nr));
		if (kthread_should_stop()) {
			complete_all(&ct->sync_stage1);
			break;
		}

		atomic_set(&ct->cnt_stage2, 0);
		reinit_completion(&ct->sync_stage2);

		if (irqwork)
			irq_work_queue(&ct->irq_work);
		else
			complete_all(&ct->sync_stage1);

		wait_event_interruptible(ct->wq_stage2,
				!(atomic_read(&ct->cnt_stage2) < waiter_nr));
		if (kthread_should_stop()) {
			complete_all(&ct->sync_stage2);
			break;
		}

		reinit_completion(&ct->sync_stage1);
		atomic_set(&ct->cnt_stage1, 0);
		complete_all(&ct->sync_stage2);
	}

	return 0;
}

static void __exit completion_test_module_cleanup(void)
{
	unsigned int i;

	if (trigger_task)
		kthread_stop(trigger_task);

	if (waiter_tasks) {
		for (i = 0; i < waiter_nr; i++) {
			if (waiter_tasks[i] && !IS_ERR(waiter_tasks[i]))
				kthread_stop(waiter_tasks[i]);

		}
		kfree(waiter_tasks);
	}
}

static int __init completion_test_module_init(void)
{
	struct completion_test *ct = &test_data;
	unsigned int i;
	int err;

	init_completion(&ct->sync_stage1);
	init_completion(&ct->sync_stage2);
	init_waitqueue_head(&ct->wq_stage1);
	init_waitqueue_head(&ct->wq_stage2);
	atomic_set(&ct->cnt_stage1, 0);
	atomic_set(&ct->cnt_stage2, 0);
	init_irq_work(&ct->irq_work, trigger_irq);

	waiter_tasks = kcalloc(waiter_nr, sizeof(waiter_tasks[0]), GFP_KERNEL);
	if (!waiter_tasks) {
		printk("out of memory\n");
		err = -ENOMEM;
		goto unwind;
	}

	for (i = 0; i < waiter_nr; i++) {
		waiter_tasks[i] = kthread_run(waiter, ct, "waiter");
		if (IS_ERR(waiter_tasks[i])) {
			err = -PTR_ERR(waiter_tasks[i]);
			goto unwind;
		}
	}

	trigger_task = kthread_run(trigger, ct, "trigger");
	if (IS_ERR(trigger_task)) {
		err = -PTR_ERR(trigger_task);
		goto unwind;
	}

	return 0;

unwind:
	completion_test_module_cleanup();
	return err;
}

module_init(completion_test_module_init);
module_exit(completion_test_module_cleanup);

MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Daniel Wagner");
MODULE_DESCRIPTION("completion test");


[1] http://thread.gmane.org/gmane.linux.kernel/2034867/focus=2034873


Daniel Wagner (1):
  sched/completion: convert completions to use simple wait queues

 include/linux/completion.h | 14 ++++++++++----
 kernel/sched/completion.c  | 41 +++++++++++++++++++++++++----------------
 2 files changed, 35 insertions(+), 20 deletions(-)

-- 
2.5.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC v0] sched/completion: convert completions to use simple wait queues
  2016-03-08 15:59 [RFC v0] Use swait in completion Daniel Wagner
@ 2016-03-08 15:59 ` Daniel Wagner
  2016-03-08 17:52   ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 10+ messages in thread
From: Daniel Wagner @ 2016-03-08 15:59 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Peter Zijlstra (Intel),
	Thomas Gleixner, Sebastian Andrzej Siewior, Daniel Wagner

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

Completions have no long lasting callbacks and therefore do not need
the complex waitqueue variant.  Use simple waitqueues which reduces
the contention on the waitqueue lock.

This was a carry forward from v3.10-rt, with some RT specific chunks,
dropped, and updated to align with names that were chosen to match the
simple waitqueue support.

[wagi: Added flag to defer swake_up_all() from irq context]

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
---
 include/linux/completion.h | 14 ++++++++++----
 kernel/sched/completion.c  | 41 +++++++++++++++++++++++++----------------
 2 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 5d5aaae..a6b2e07 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -8,7 +8,7 @@
  * See kernel/sched/completion.c for details.
  */
 
-#include <linux/wait.h>
+#include <linux/swait.h>
 
 /*
  * struct completion - structure used to maintain state for a "completion"
@@ -22,13 +22,17 @@
  * reinit_completion(), and macros DECLARE_COMPLETION(),
  * DECLARE_COMPLETION_ONSTACK().
  */
+
+#define COMPLETION_DEFER (1 << 0)
+
 struct completion {
+	unsigned int flags;
 	unsigned int done;
-	wait_queue_head_t wait;
+	struct swait_queue_head wait;
 };
 
 #define COMPLETION_INITIALIZER(work) \
-	{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+	{ 0, 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
 
 #define COMPLETION_INITIALIZER_ONSTACK(work) \
 	({ init_completion(&work); work; })
@@ -72,8 +76,9 @@ struct completion {
  */
 static inline void init_completion(struct completion *x)
 {
+	x->flags = 0;
 	x->done = 0;
-	init_waitqueue_head(&x->wait);
+	init_swait_queue_head(&x->wait);
 }
 
 /**
@@ -85,6 +90,7 @@ static inline void init_completion(struct completion *x)
  */
 static inline void reinit_completion(struct completion *x)
 {
+	x->flags = 0;
 	x->done = 0;
 }
 
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 8d0f35d..95b08a9 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -30,10 +30,10 @@ void complete(struct completion *x)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&x->wait.lock, flags);
+	raw_spin_lock_irqsave(&x->wait.lock, flags);
 	x->done++;
-	__wake_up_locked(&x->wait, TASK_NORMAL, 1);
-	spin_unlock_irqrestore(&x->wait.lock, flags);
+	swake_up_locked(&x->wait);
+	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
 }
 EXPORT_SYMBOL(complete);
 
@@ -50,10 +50,15 @@ void complete_all(struct completion *x)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&x->wait.lock, flags);
+	raw_spin_lock_irqsave(&x->wait.lock, flags);
 	x->done += UINT_MAX/2;
-	__wake_up_locked(&x->wait, TASK_NORMAL, 0);
-	spin_unlock_irqrestore(&x->wait.lock, flags);
+	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+	if (in_irq()) {
+		x->flags = COMPLETION_DEFER;
+		swake_up(&x->wait);
+	} else {
+		swake_up_all(&x->wait);
+	}
 }
 EXPORT_SYMBOL(complete_all);
 
@@ -62,20 +67,20 @@ do_wait_for_common(struct completion *x,
 		   long (*action)(long), long timeout, int state)
 {
 	if (!x->done) {
-		DECLARE_WAITQUEUE(wait, current);
+		DECLARE_SWAITQUEUE(wait);
 
-		__add_wait_queue_tail_exclusive(&x->wait, &wait);
+		__prepare_to_swait(&x->wait, &wait);
 		do {
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
 				break;
 			}
 			__set_current_state(state);
-			spin_unlock_irq(&x->wait.lock);
+			raw_spin_unlock_irq(&x->wait.lock);
 			timeout = action(timeout);
-			spin_lock_irq(&x->wait.lock);
+			raw_spin_lock_irq(&x->wait.lock);
 		} while (!x->done && timeout);
-		__remove_wait_queue(&x->wait, &wait);
+		__finish_swait(&x->wait, &wait);
 		if (!x->done)
 			return timeout;
 	}
@@ -89,9 +94,13 @@ __wait_for_common(struct completion *x,
 {
 	might_sleep();
 
-	spin_lock_irq(&x->wait.lock);
+	raw_spin_lock_irq(&x->wait.lock);
 	timeout = do_wait_for_common(x, action, timeout, state);
-	spin_unlock_irq(&x->wait.lock);
+	raw_spin_unlock_irq(&x->wait.lock);
+	if (x->flags & COMPLETION_DEFER) {
+		x->flags = 0;
+		swake_up_all(&x->wait);
+	}
 	return timeout;
 }
 
@@ -277,12 +286,12 @@ bool try_wait_for_completion(struct completion *x)
 	if (!READ_ONCE(x->done))
 		return 0;
 
-	spin_lock_irqsave(&x->wait.lock, flags);
+	raw_spin_lock_irqsave(&x->wait.lock, flags);
 	if (!x->done)
 		ret = 0;
 	else
 		x->done--;
-	spin_unlock_irqrestore(&x->wait.lock, flags);
+	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(try_wait_for_completion);
@@ -311,7 +320,7 @@ bool completion_done(struct completion *x)
 	 * after it's acquired the lock.
 	 */
 	smp_rmb();
-	spin_unlock_wait(&x->wait.lock);
+	raw_spin_unlock_wait(&x->wait.lock);
 	return true;
 }
 EXPORT_SYMBOL(completion_done);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC v0] Use swait in completion
  2016-03-08 15:59 [RFC v0] Use swait in completion Daniel Wagner
@ 2016-03-08 17:52   ` Sebastian Andrzej Siewior
  2016-03-08 17:52   ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 10+ messages in thread
From: Sebastian Andrzej Siewior @ 2016-03-08 17:52 UTC (permalink / raw)
  To: Daniel Wagner
  Cc: linux-kernel, linux-rt-users, Peter Zijlstra (Intel),
	Thomas Gleixner, Daniel Wagner

* Daniel Wagner | 2016-03-08 16:59:13 [+0100]:

>Hi,
Hi,

>As Peter correctly pointed out in [1] a simple conversion from
>wait to swait in completion.c wont work. I played a bit around and
>came up with this rather ugly idea.

besides all the things I mentioned privatly, here is what I have
currently in -RT:

+void swake_up_all_locked(struct swait_queue_head *q)
+{
+       struct swait_queue *curr;
+       int wakes = 0;
+
+       while (!list_empty(&q->task_list)) {
+
+               curr = list_first_entry(&q->task_list, typeof(*curr),
+                                       task_list);
+               wake_up_process(curr->task);
+               list_del_init(&curr->task_list);
+               wakes++;
+       }
+       WARN_ON(wakes > 2);
+}
+EXPORT_SYMBOL(swake_up_all_locked);

the remaining part is what you have. The only user so far is complete()
and currently I see ony complete_all() with zero or one waiter.
If none of my boxes die over the night, I intend to release this
tomorrow in -RT and see if someone else triggers the limit.

However I don't think if your DEFER flag solution is all that bad. I
have also the block-mq in -RT using swait and they perform wakes with
irqs-off. Not in -RT but mainline. So me might need something to make it
work properly. But if we defer the wakeup they might come at us and
complain about the latency…

Sebastian

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC v0] Use swait in completion
@ 2016-03-08 17:52   ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 10+ messages in thread
From: Sebastian Andrzej Siewior @ 2016-03-08 17:52 UTC (permalink / raw)
  To: Daniel Wagner
  Cc: linux-kernel, linux-rt-users, Peter Zijlstra (Intel),
	Thomas Gleixner, Daniel Wagner

* Daniel Wagner | 2016-03-08 16:59:13 [+0100]:

>Hi,
Hi,

>As Peter correctly pointed out in [1] a simple conversion from
>wait to swait in completion.c wont work. I played a bit around and
>came up with this rather ugly idea.

besides all the things I mentioned privatly, here is what I have
currently in -RT:

+void swake_up_all_locked(struct swait_queue_head *q)
+{
+       struct swait_queue *curr;
+       int wakes = 0;
+
+       while (!list_empty(&q->task_list)) {
+
+               curr = list_first_entry(&q->task_list, typeof(*curr),
+                                       task_list);
+               wake_up_process(curr->task);
+               list_del_init(&curr->task_list);
+               wakes++;
+       }
+       WARN_ON(wakes > 2);
+}
+EXPORT_SYMBOL(swake_up_all_locked);

the remaining part is what you have. The only user so far is complete()
and currently I see ony complete_all() with zero or one waiter.
If none of my boxes die over the night, I intend to release this
tomorrow in -RT and see if someone else triggers the limit.

However I don't think if your DEFER flag solution is all that bad. I
have also the block-mq in -RT using swait and they perform wakes with
irqs-off. Not in -RT but mainline. So me might need something to make it
work properly. But if we defer the wakeup they might come at us and
complain about the latency…

Sebastian
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC v0] Use swait in completion
  2016-03-08 17:52   ` Sebastian Andrzej Siewior
@ 2016-03-08 18:07     ` Daniel Wagner
  -1 siblings, 0 replies; 10+ messages in thread
From: Daniel Wagner @ 2016-03-08 18:07 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Daniel Wagner
  Cc: linux-kernel, linux-rt-users, Peter Zijlstra (Intel), Thomas Gleixner

On 03/08/2016 06:52 PM, Sebastian Andrzej Siewior wrote:
> However I don't think if your DEFER flag solution is all that bad. I
> have also the block-mq in -RT using swait and they perform wakes with
> irqs-off. Not in -RT but mainline. So me might need something to make it
> work properly. But if we defer the wakeup they might come at us and
> complain about the latency…

I intended to extend the test code to measure the latency diff as well.
Just to get a feeling how bad it is. At least with a lot of waiters on
the completion the current measurement indicate an improvement. I guess
with real workload the things look quite different.

cheers,
daniel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC v0] Use swait in completion
@ 2016-03-08 18:07     ` Daniel Wagner
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Wagner @ 2016-03-08 18:07 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Daniel Wagner
  Cc: linux-kernel, linux-rt-users, Peter Zijlstra (Intel), Thomas Gleixner

On 03/08/2016 06:52 PM, Sebastian Andrzej Siewior wrote:
> However I don't think if your DEFER flag solution is all that bad. I
> have also the block-mq in -RT using swait and they perform wakes with
> irqs-off. Not in -RT but mainline. So me might need something to make it
> work properly. But if we defer the wakeup they might come at us and
> complain about the latency…

I intended to extend the test code to measure the latency diff as well.
Just to get a feeling how bad it is. At least with a lot of waiters on
the completion the current measurement indicate an improvement. I guess
with real workload the things look quite different.

cheers,
daniel
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC v0] Use swait in completion
  2016-03-08 17:52   ` Sebastian Andrzej Siewior
  (?)
  (?)
@ 2016-03-08 18:26   ` Josh Cartwright
  2016-03-09 12:24     ` Sebastian Andrzej Siewior
  -1 siblings, 1 reply; 10+ messages in thread
From: Josh Cartwright @ 2016-03-08 18:26 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Daniel Wagner, linux-kernel, linux-rt-users,
	Peter Zijlstra (Intel),
	Thomas Gleixner, Daniel Wagner

[-- Attachment #1: Type: text/plain, Size: 1960 bytes --]

On Tue, Mar 08, 2016 at 06:52:06PM +0100, Sebastian Andrzej Siewior wrote:
> * Daniel Wagner | 2016-03-08 16:59:13 [+0100]:
> 
> >Hi,
> Hi,
> 
> >As Peter correctly pointed out in [1] a simple conversion from
> >wait to swait in completion.c wont work. I played a bit around and
> >came up with this rather ugly idea.
> 
> besides all the things I mentioned privatly, here is what I have
> currently in -RT:
> 
> +void swake_up_all_locked(struct swait_queue_head *q)
> +{
> +       struct swait_queue *curr;
> +       int wakes = 0;
> +
> +       while (!list_empty(&q->task_list)) {
> +
> +               curr = list_first_entry(&q->task_list, typeof(*curr),
> +                                       task_list);
> +               wake_up_process(curr->task);
> +               list_del_init(&curr->task_list);
> +               wakes++;
> +       }
> +       WARN_ON(wakes > 2);
> +}
> +EXPORT_SYMBOL(swake_up_all_locked);
> 
> the remaining part is what you have. The only user so far is complete()
> and currently I see ony complete_all() with zero or one waiter.
> If none of my boxes die over the night, I intend to release this
> tomorrow in -RT and see if someone else triggers the limit.
> 
> However I don't think if your DEFER flag solution is all that bad.  I
> have also the block-mq in -RT using swait and they perform wakes with
> irqs-off. Not in -RT but mainline. So me might need something to make
> it work properly. But if we defer the wakeup they might come at us and
> complain about the latency???

Is it really just about latency?  Does this deferral not lead to an
inversion in the case where the single woken task isn't the highest
priority waiter on the completion (and doesn't run due to a
middle-priority thing spinning)?

In order for this to work, it seems like the chosen waiter would need to
inherit the highest priority of all waiters (which AFAICT isn't
happening).

  Josh

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC v0] Use swait in completion
  2016-03-08 18:26   ` Josh Cartwright
@ 2016-03-09 12:24     ` Sebastian Andrzej Siewior
  2016-03-28 18:57       ` Steven Rostedt
  0 siblings, 1 reply; 10+ messages in thread
From: Sebastian Andrzej Siewior @ 2016-03-09 12:24 UTC (permalink / raw)
  To: Josh Cartwright
  Cc: Daniel Wagner, linux-kernel, linux-rt-users,
	Peter Zijlstra (Intel),
	Thomas Gleixner, Daniel Wagner

* Josh Cartwright | 2016-03-08 12:26:56 [-0600]:

>Is it really just about latency?  Does this deferral not lead to an
>inversion in the case where the single woken task isn't the highest
>priority waiter on the completion (and doesn't run due to a
>middle-priority thing spinning)?

This would be case, yes. Not only with deferral. Say you have two
waters: 1st one is MID-prio and the second is HI-prio. Currently after
the wakeup of the MID-prio waiter you get preempted. Waking all of them
at once would put the second waiter first on the CPU.
Samething without the deferral flag.

>In order for this to work, it seems like the chosen waiter would need to
>inherit the highest priority of all waiters (which AFAICT isn't
>happening).

sorting the waiters by priority? This will be fun. This is only done for
the rtmutex waiters.

>  Josh

Sebastian

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC v0] Use swait in completion
  2016-03-09 12:24     ` Sebastian Andrzej Siewior
@ 2016-03-28 18:57       ` Steven Rostedt
  2016-03-31  6:14         ` Daniel Wagner
  0 siblings, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2016-03-28 18:57 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Josh Cartwright, Daniel Wagner, linux-kernel, linux-rt-users,
	Peter Zijlstra (Intel),
	Thomas Gleixner, Daniel Wagner

On Wed, 9 Mar 2016 13:24:23 +0100
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> * Josh Cartwright | 2016-03-08 12:26:56 [-0600]:
> 
> >Is it really just about latency?  Does this deferral not lead to an
> >inversion in the case where the single woken task isn't the highest
> >priority waiter on the completion (and doesn't run due to a
> >middle-priority thing spinning)?  
> 
> This would be case, yes. Not only with deferral. Say you have two
> waters: 1st one is MID-prio and the second is HI-prio. Currently after
> the wakeup of the MID-prio waiter you get preempted. Waking all of them
> at once would put the second waiter first on the CPU.
> Samething without the deferral flag.
> 
> >In order for this to work, it seems like the chosen waiter would need to
> >inherit the highest priority of all waiters (which AFAICT isn't
> >happening).  
> 
> sorting the waiters by priority? This will be fun. This is only done for
> the rtmutex waiters.
> 

Hmm, perhaps we should use an rbtree to sort simple waiters by
priority :-)

Probably wont make them simple anymore.

-- Steve

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC v0] Use swait in completion
  2016-03-28 18:57       ` Steven Rostedt
@ 2016-03-31  6:14         ` Daniel Wagner
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Wagner @ 2016-03-31  6:14 UTC (permalink / raw)
  To: Steven Rostedt, Sebastian Andrzej Siewior
  Cc: Josh Cartwright, linux-kernel, linux-rt-users,
	Peter Zijlstra (Intel),
	Thomas Gleixner, Daniel Wagner

On 03/28/2016 08:57 PM, Steven Rostedt wrote:
> On Wed, 9 Mar 2016 13:24:23 +0100
> Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> 
>> * Josh Cartwright | 2016-03-08 12:26:56 [-0600]:
>>
>>> Is it really just about latency?  Does this deferral not lead to an
>>> inversion in the case where the single woken task isn't the highest
>>> priority waiter on the completion (and doesn't run due to a
>>> middle-priority thing spinning)?  
>>
>> This would be case, yes. Not only with deferral. Say you have two
>> waters: 1st one is MID-prio and the second is HI-prio. Currently after
>> the wakeup of the MID-prio waiter you get preempted. Waking all of them
>> at once would put the second waiter first on the CPU.
>> Samething without the deferral flag.
>>
>>> In order for this to work, it seems like the chosen waiter would need to
>>> inherit the highest priority of all waiters (which AFAICT isn't
>>> happening).  
>>
>> sorting the waiters by priority? This will be fun. This is only done for
>> the rtmutex waiters.
>>
> 
> Hmm, perhaps we should use an rbtree to sort simple waiters by
> priority :-)
> 
> Probably wont make them simple anymore.

I am going through the users of complete_all(). So far I couldn't
identify any user with more than one waiter. That doesn't mean they
don't exist (maybe overlooked).

The -rt version of this patch contains a WARN_ON() in complete_all() if
there are more than one waiter. During resume some warnings are printed.
Let's see if there are more reports showing up.

If the common use case is one waiter and only in slow paths we see
several waiters I would like to avoid to adding fairness and increase
the complexity.

cheers,
daniel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-03-31  6:14 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-08 15:59 [RFC v0] Use swait in completion Daniel Wagner
2016-03-08 15:59 ` [RFC v0] sched/completion: convert completions to use simple wait queues Daniel Wagner
2016-03-08 17:52 ` [RFC v0] Use swait in completion Sebastian Andrzej Siewior
2016-03-08 17:52   ` Sebastian Andrzej Siewior
2016-03-08 18:07   ` Daniel Wagner
2016-03-08 18:07     ` Daniel Wagner
2016-03-08 18:26   ` Josh Cartwright
2016-03-09 12:24     ` Sebastian Andrzej Siewior
2016-03-28 18:57       ` Steven Rostedt
2016-03-31  6:14         ` Daniel Wagner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.