linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH 0/5] rt: workqueue PI support -v2
@ 2007-10-23 12:03 Peter Zijlstra
  2007-10-23 12:03 ` [RFC/PATCH 1/5] rt: rename rt_mutex_setprio to task_setprio Peter Zijlstra
                   ` (6 more replies)
  0 siblings, 7 replies; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 12:03 UTC (permalink / raw)
  To: linux-kernel
  Cc: Daniel Walker, Steven Rostedt, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov


Still not more than boot tested,...

Oleg, do you have workqueue test modules?

Changes since -v1:
 - proper plist_head_splice() implementation
 - removed the plist_add(, .tail) thing, using prio -1 instead.

(patch against v2.6.23-rt1)

--


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH 1/5] rt: rename rt_mutex_setprio to task_setprio
  2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
@ 2007-10-23 12:03 ` Peter Zijlstra
  2007-10-23 12:03 ` [RFC/PATCH 2/5] rt: list_splice2 Peter Zijlstra
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 12:03 UTC (permalink / raw)
  To: linux-kernel
  Cc: Daniel Walker, Steven Rostedt, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov, Peter Zijlstra

[-- Attachment #1: rt_mutex_setprio.patch --]
[-- Type: text/plain, Size: 2688 bytes --]

With there being multiple non-mutex users of this function its past time it
got renamed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/sched.h     |    7 ++++++-
 kernel/rcupreempt-boost.c |    4 ++--
 kernel/sched.c            |    8 ++------
 3 files changed, 10 insertions(+), 9 deletions(-)

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1655,9 +1655,14 @@ extern unsigned int sysctl_sched_compat_
 extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 
+extern void task_setprio(struct task_struct *p, int prio);
+
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
+static inline void rt_mutex_setprio(struct task_struct *p, int prio)
+{
+	task_setprio(p, prio);
+}
 extern void rt_mutex_adjust_pi(struct task_struct *p);
 #else
 static inline int rt_mutex_getprio(struct task_struct *p)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -4445,10 +4445,8 @@ long __sched sleep_on_timeout(wait_queue
 }
 EXPORT_SYMBOL(sleep_on_timeout);
 
-#ifdef CONFIG_RT_MUTEXES
-
 /*
- * rt_mutex_setprio - set the current priority of a task
+ * task_setprio - set the current priority of a task
  * @p: task
  * @prio: prio value (kernel-internal form)
  *
@@ -4457,7 +4455,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
  *
  * Used by the rt_mutex code to implement priority inheritance logic.
  */
-void rt_mutex_setprio(struct task_struct *p, int prio)
+void task_setprio(struct task_struct *p, int prio)
 {
 	unsigned long flags;
 	int oldprio, prev_resched, on_rq;
@@ -4522,8 +4520,6 @@ out_unlock:
 	task_rq_unlock(rq, &flags);
 }
 
-#endif
-
 void set_user_nice(struct task_struct *p, long nice)
 {
 	int old_prio, delta, on_rq;
Index: linux-2.6/kernel/rcupreempt-boost.c
===================================================================
--- linux-2.6.orig/kernel/rcupreempt-boost.c
+++ linux-2.6/kernel/rcupreempt-boost.c
@@ -233,7 +233,7 @@ static void rcu_boost_task(struct task_s
 
 	if (task->rcu_prio < task->prio) {
 		rcu_trace_boost_task_boosted(RCU_BOOST_ME);
-		rt_mutex_setprio(task, task->rcu_prio);
+		task_setprio(task, task->rcu_prio);
 	}
 }
 
@@ -325,7 +325,7 @@ void __rcu_preempt_unboost(void)
 
 	spin_lock(&curr->pi_lock);
 	prio = rt_mutex_getprio(curr);
-	rt_mutex_setprio(curr, prio);
+	task_setprio(curr, prio);
 
 	curr->rcub_rbdp = NULL;
 

--


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH 2/5] rt: list_splice2
  2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
  2007-10-23 12:03 ` [RFC/PATCH 1/5] rt: rename rt_mutex_setprio to task_setprio Peter Zijlstra
@ 2007-10-23 12:03 ` Peter Zijlstra
  2007-10-23 14:08   ` Steven Rostedt
  2007-10-23 12:04 ` [RFC/PATCH 3/5] rt: plist_head_splice Peter Zijlstra
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 12:03 UTC (permalink / raw)
  To: linux-kernel
  Cc: Daniel Walker, Steven Rostedt, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov, Peter Zijlstra

[-- Attachment #1: rt-list-mods.patch --]
[-- Type: text/plain, Size: 3930 bytes --]

Introduce list_splice2{,_tail}() which will splice a sub-list denoted
by two list items instead of the full list.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 drivers/usb/host/ehci-q.c |    2 -
 include/linux/list.h      |   66 ++++++++++++++++++++++++++++++++++++++++------
 lib/lock_list.c           |    2 -
 3 files changed, 60 insertions(+), 10 deletions(-)

Index: linux-2.6/include/linux/list.h
===================================================================
--- linux-2.6.orig/include/linux/list.h
+++ linux-2.6/include/linux/list.h
@@ -320,17 +320,17 @@ static inline int list_empty_careful(con
 }
 
 static inline void __list_splice(struct list_head *list,
-				 struct list_head *head)
+				 struct list_head *prev,
+				 struct list_head *next)
 {
 	struct list_head *first = list->next;
 	struct list_head *last = list->prev;
-	struct list_head *at = head->next;
 
-	first->prev = head;
-	head->next = first;
+	first->prev = prev;
+	prev->next = first;
 
-	last->next = at;
-	at->prev = last;
+	last->next = next;
+	next->prev = last;
 }
 
 /**
@@ -341,7 +341,13 @@ static inline void __list_splice(struct 
 static inline void list_splice(struct list_head *list, struct list_head *head)
 {
 	if (!list_empty(list))
-		__list_splice(list, head);
+		__list_splice(list, head, head->next);
+}
+
+static inline void list_splice_tail(struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head->prev, head);
 }
 
 /**
@@ -355,11 +361,55 @@ static inline void list_splice_init(stru
 				    struct list_head *head)
 {
 	if (!list_empty(list)) {
-		__list_splice(list, head);
+		__list_splice(list, head, head->next);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+static inline void list_splice_tail_init(struct list_head *list,
+					 struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head->prev, head);
 		INIT_LIST_HEAD(list);
 	}
 }
 
+static inline void __list_splice2(struct list_head *first,
+	       			struct list_head *last,
+				struct list_head *prev,
+				struct list_head *next)
+{
+	first->prev->next = last->next;
+	last->next->prev = first->prev;
+
+	first->prev = prev;
+	prev->next = first;
+
+	last->next = next;
+	next->prev = last;
+}
+
+/**
+ * list_splice2 - join [first, last] to head
+ * @first: list item
+ * @last: list item further on the same list
+ * @head: the place to add it on another list
+ */
+static inline void list_splice2(struct list_head *first,
+				struct list_head *last,
+				struct list_head *head)
+{
+	__list_splice2(first, last, head, head->next);
+}
+
+static inline void list_splice2_tail(struct list_head *first,
+				     struct list_head *last,
+				     struct list_head *head)
+{
+	__list_splice2(first, last, head->prev, head);
+}
+
 /**
  * list_splice_init_rcu - splice an RCU-protected list into an existing list.
  * @list:	the RCU-protected list to splice
Index: linux-2.6/drivers/usb/host/ehci-q.c
===================================================================
--- linux-2.6.orig/drivers/usb/host/ehci-q.c
+++ linux-2.6/drivers/usb/host/ehci-q.c
@@ -887,7 +887,7 @@ static struct ehci_qh *qh_append_tds (
 
 			list_del (&qtd->qtd_list);
 			list_add (&dummy->qtd_list, qtd_list);
-			__list_splice (qtd_list, qh->qtd_list.prev);
+			list_splice_tail (qtd_list, &qh->qtd_list);
 
 			ehci_qtd_init(ehci, qtd, qtd->qtd_dma);
 			qh->dummy = qtd;
Index: linux-2.6/lib/lock_list.c
===================================================================
--- linux-2.6.orig/lib/lock_list.c
+++ linux-2.6/lib/lock_list.c
@@ -128,7 +128,7 @@ void lock_list_splice_init(struct lock_l
 	lock = __lock_list_reverse(list);
 	if (!list_empty(&list->head)) {
 		spin_lock_nested(&head->lock, LOCK_LIST_NESTING_NEXT);
-		__list_splice(&list->head, &head->head);
+		__list_splice(&list->head, &head->head, head->head.next);
 		INIT_LIST_HEAD(&list->head);
 		spin_unlock(&head->lock);
 	}

--


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH 3/5] rt: plist_head_splice
  2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
  2007-10-23 12:03 ` [RFC/PATCH 1/5] rt: rename rt_mutex_setprio to task_setprio Peter Zijlstra
  2007-10-23 12:03 ` [RFC/PATCH 2/5] rt: list_splice2 Peter Zijlstra
@ 2007-10-23 12:04 ` Peter Zijlstra
  2007-10-23 15:10   ` Steven Rostedt
  2007-10-23 12:04 ` [RFC/PATCH 4/5] rt: PI-workqueue support Peter Zijlstra
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 12:04 UTC (permalink / raw)
  To: linux-kernel
  Cc: Daniel Walker, Steven Rostedt, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov, Peter Zijlstra

[-- Attachment #1: rt-plist-mods.patch --]
[-- Type: text/plain, Size: 3464 bytes --]

merge-sort two plists together

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/plist.h |    2 +
 lib/plist.c           |   68 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 68 insertions(+), 2 deletions(-)

Index: linux-2.6/include/linux/plist.h
===================================================================
--- linux-2.6.orig/include/linux/plist.h
+++ linux-2.6/include/linux/plist.h
@@ -148,6 +148,8 @@ static inline void plist_node_init(struc
 extern void plist_add(struct plist_node *node, struct plist_head *head);
 extern void plist_del(struct plist_node *node, struct plist_head *head);
 
+extern void plist_head_splice(struct plist_head *src, struct plist_head *dst);
+
 /**
  * plist_for_each - iterate over the plist
  * @pos:	the type * to use as a loop counter
Index: linux-2.6/lib/plist.c
===================================================================
--- linux-2.6.orig/lib/plist.c
+++ linux-2.6/lib/plist.c
@@ -66,6 +66,30 @@ static void plist_check_head(struct plis
 # define plist_check_head(h)	do { } while (0)
 #endif
 
+static inline struct plist_node *prev_node(struct plist_node *iter)
+{
+	return list_entry(iter->plist.node_list.prev, struct plist_node,
+			plist.node_list);
+}
+
+static inline struct plist_node *next_node(struct plist_node *iter)
+{
+	return list_entry(iter->plist.node_list.next, struct plist_node,
+			plist.node_list);
+}
+
+static inline struct plist_node *prev_prio(struct plist_node *iter)
+{
+	return list_entry(iter->plist.prio_list.prev, struct plist_node,
+			plist.prio_list);
+}
+
+static inline struct plist_node *next_prio(struct plist_node *iter)
+{
+	return list_entry(iter->plist.prio_list.next, struct plist_node,
+			plist.prio_list);
+}
+
 /**
  * plist_add - add @node to @head
  *
@@ -83,8 +107,7 @@ void plist_add(struct plist_node *node, 
 		if (node->prio < iter->prio)
 			goto lt_prio;
 		else if (node->prio == iter->prio) {
-			iter = list_entry(iter->plist.prio_list.next,
-					struct plist_node, plist.prio_list);
+			iter = next_prio(iter);
 			goto eq_prio;
 		}
 	}
@@ -118,3 +141,44 @@ void plist_del(struct plist_node *node, 
 
 	plist_check_head(head);
 }
+
+void plist_head_splice(struct plist_head *src, struct plist_head *dst)
+{
+	struct plist_node *src_iter_first, *src_iter_last, *dst_iter;
+	struct plist_node *tail = container_of(dst, struct plist_node, plist);
+
+	dst_iter = next_prio(tail);
+
+	while (!plist_head_empty(src) && dst_iter != tail) {
+		src_iter_first = plist_first(src);
+
+		src_iter_last = next_prio(src_iter_first);
+		src_iter_last = prev_node(src_iter_last);
+
+		WARN_ON(src_iter_first->prio != src_iter_last->prio);
+		WARN_ON(list_empty(&src_iter_first->plist.prio_list));
+
+		while (src_iter_first->prio > dst_iter->prio) {
+			dst_iter = next_prio(dst_iter);
+			if (dst_iter == tail)
+				goto tail;
+		}
+
+		list_del_init(&src_iter_first->plist.prio_list);
+
+		if (src_iter_first->prio < dst_iter->prio) {
+			list_add_tail(&src_iter_first->plist.node_list,
+					&dst_iter->plist.node_list);
+		} else if (src_iter_first->prio == dst_iter->prio) {
+			dst_iter = next_prio(dst_iter);
+		} else BUG();
+
+		list_splice2_tail(&src_iter_first->plist.node_list,
+			       	  &src_iter_last->plist.node_list,
+				  &dst_iter->plist.node_list);
+	}
+
+tail:
+	list_splice_tail_init(&src->prio_list, &dst->prio_list);
+	list_splice_tail_init(&src->node_list, &dst->node_list);
+}

--


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH 4/5] rt: PI-workqueue support
  2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
                   ` (2 preceding siblings ...)
  2007-10-23 12:04 ` [RFC/PATCH 3/5] rt: plist_head_splice Peter Zijlstra
@ 2007-10-23 12:04 ` Peter Zijlstra
  2007-10-23 12:04 ` [RFC/PATCH 5/5] rt: PI-workqueue: fix barriers Peter Zijlstra
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 12:04 UTC (permalink / raw)
  To: linux-kernel
  Cc: Daniel Walker, Steven Rostedt, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov, Peter Zijlstra

[-- Attachment #1: rt-workqeue-prio.patch --]
[-- Type: text/plain, Size: 6295 bytes --]

Add support for priority queueing and priority inheritance to the workqueue
infrastructure. This is done by replacing the linear linked worklist with a
priority sorted plist.

The drawback is that this breaks the workqueue barrier, needed to support
flush_workqueue() and wait_on_work().

Signed-off-by: Daniel Walker <dwalker@mvista.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/workqueue.h |    7 ++++---
 kernel/power/poweroff.c   |    1 +
 kernel/workqueue.c        |   40 +++++++++++++++++++++++++---------------
 3 files changed, 30 insertions(+), 18 deletions(-)

Index: linux-2.6/include/linux/workqueue.h
===================================================================
--- linux-2.6.orig/include/linux/workqueue.h
+++ linux-2.6/include/linux/workqueue.h
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <linux/plist.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -26,7 +27,7 @@ struct work_struct {
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
 #define WORK_STRUCT_FLAG_MASK (3UL)
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
-	struct list_head entry;
+	struct plist_node entry;
 	work_func_t func;
 };
 
@@ -43,7 +44,7 @@ struct execute_work {
 
 #define __WORK_INITIALIZER(n, f) {				\
 	.data = WORK_DATA_INIT(),				\
-	.entry	= { &(n).entry, &(n).entry },			\
+	.entry	= PLIST_NODE_INIT(n.entry, MAX_PRIO),		\
 	.func = (f),						\
 	}
 
@@ -79,7 +80,7 @@ struct execute_work {
 #define INIT_WORK(_work, _func)						\
 	do {								\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
-		INIT_LIST_HEAD(&(_work)->entry);			\
+		plist_node_init(&(_work)->entry, -1);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
 
Index: linux-2.6/kernel/power/poweroff.c
===================================================================
--- linux-2.6.orig/kernel/power/poweroff.c
+++ linux-2.6/kernel/power/poweroff.c
@@ -8,6 +8,7 @@
 #include <linux/sysrq.h>
 #include <linux/init.h>
 #include <linux/pm.h>
+#include <linux/sched.h>
 #include <linux/workqueue.h>
 #include <linux/reboot.h>
 
Index: linux-2.6/kernel/workqueue.c
===================================================================
--- linux-2.6.orig/kernel/workqueue.c
+++ linux-2.6/kernel/workqueue.c
@@ -44,7 +44,7 @@ struct cpu_workqueue_struct {
 
 	spinlock_t lock;
 
-	struct list_head worklist;
+	struct plist_head worklist;
 	wait_queue_head_t more_work;
 	struct work_struct *current_work;
 
@@ -127,16 +127,19 @@ struct cpu_workqueue_struct *get_wq_data
 static void insert_work(struct cpu_workqueue_struct *cwq,
 				struct work_struct *work, int tail)
 {
+	int prio = current->normal_prio;
+
 	set_wq_data(work, cwq);
 	/*
 	 * Ensure that we get the right work->data if we see the
 	 * result of list_add() below, see try_to_grab_pending().
 	 */
 	smp_wmb();
-	if (tail)
-		list_add_tail(&work->entry, &cwq->worklist);
-	else
-		list_add(&work->entry, &cwq->worklist);
+	plist_node_init(&work->entry, prio);
+	plist_add(&work->entry, &cwq->worklist);
+
+	if (prio < cwq->thread->prio)
+		task_setprio(cwq->thread, prio);
 	wake_up(&cwq->more_work);
 }
 
@@ -168,7 +171,7 @@ int fastcall queue_work(struct workqueue
 	int ret = 0, cpu = raw_smp_processor_id();
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
-		BUG_ON(!list_empty(&work->entry));
+		BUG_ON(!plist_node_empty(&work->entry));
 		__queue_work(wq_per_cpu(wq, cpu), work);
 		ret = 1;
 	}
@@ -222,7 +225,7 @@ int queue_delayed_work_on(int cpu, struc
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		BUG_ON(timer_pending(timer));
-		BUG_ON(!list_empty(&work->entry));
+		BUG_ON(!plist_node_empty(&work->entry));
 
 		/* This stores cwq for the moment, for the timer_fn */
 		set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
@@ -264,13 +267,17 @@ static void run_workqueue(struct cpu_wor
 			__FUNCTION__, cwq->run_depth);
 		dump_stack();
 	}
-	while (!list_empty(&cwq->worklist)) {
-		struct work_struct *work = list_entry(cwq->worklist.next,
+	while (!plist_head_empty(&cwq->worklist)) {
+		struct work_struct *work = plist_first_entry(&cwq->worklist,
 						struct work_struct, entry);
 		work_func_t f = work->func;
 
+		if (likely(cwq->thread->prio != work->entry.prio))
+			task_setprio(cwq->thread, work->entry.prio);
+
 		cwq->current_work = work;
-		list_del_init(cwq->worklist.next);
+		plist_del(&work->entry, &cwq->worklist);
+		plist_node_init(&work->entry, MAX_PRIO);
 		spin_unlock_irq(&cwq->lock);
 
 		BUG_ON(get_wq_data(work) != cwq);
@@ -283,6 +290,7 @@ static void run_workqueue(struct cpu_wor
 		spin_lock_irq(&cwq->lock);
 		cwq->current_work = NULL;
 	}
+	task_setprio(cwq->thread, current->normal_prio);
 	cwq->run_depth--;
 	spin_unlock_irq(&cwq->lock);
 }
@@ -301,7 +309,7 @@ static int worker_thread(void *__cwq)
 		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
 		if (!freezing(current) &&
 		    !kthread_should_stop() &&
-		    list_empty(&cwq->worklist))
+		    plist_head_empty(&cwq->worklist))
 			schedule();
 		finish_wait(&cwq->more_work, &wait);
 
@@ -354,7 +362,8 @@ static int flush_cpu_workqueue(struct cp
 
 		active = 0;
 		spin_lock_irq(&cwq->lock);
-		if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
+		if (!plist_head_empty(&cwq->worklist) ||
+			cwq->current_work != NULL) {
 			insert_wq_barrier(cwq, &barr, 1);
 			active = 1;
 		}
@@ -413,7 +422,7 @@ static int try_to_grab_pending(struct wo
 		return ret;
 
 	spin_lock_irq(&cwq->lock);
-	if (!list_empty(&work->entry)) {
+	if (!plist_node_empty(&work->entry)) {
 		/*
 		 * This work is queued, but perhaps we locked the wrong cwq.
 		 * In that case we must see the new value after rmb(), see
@@ -421,7 +430,8 @@ static int try_to_grab_pending(struct wo
 		 */
 		smp_rmb();
 		if (cwq == get_wq_data(work)) {
-			list_del_init(&work->entry);
+			plist_del(&work->entry, &cwq->worklist);
+			plist_node_init(&work->entry, MAX_PRIO);
 			ret = 1;
 		}
 	}
@@ -747,7 +757,7 @@ init_cpu_workqueue(struct workqueue_stru
 
 	cwq->wq = wq;
 	spin_lock_init(&cwq->lock);
-	INIT_LIST_HEAD(&cwq->worklist);
+	plist_head_init(&cwq->worklist, NULL);
 	init_waitqueue_head(&cwq->more_work);
 
 	return cwq;

--


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH 5/5] rt: PI-workqueue: fix barriers
  2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
                   ` (3 preceding siblings ...)
  2007-10-23 12:04 ` [RFC/PATCH 4/5] rt: PI-workqueue support Peter Zijlstra
@ 2007-10-23 12:04 ` Peter Zijlstra
  2007-10-23 19:22 ` [RFC/PATCH 6/5] rt: PI-workqueue: wait_on_work() fixup Peter Zijlstra
  2007-10-23 19:22 ` [RFC/PATCH 7/5] rt: PI-workqueue: propagate prio for delayed work Peter Zijlstra
  6 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 12:04 UTC (permalink / raw)
  To: linux-kernel
  Cc: Daniel Walker, Steven Rostedt, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov, Peter Zijlstra

[-- Attachment #1: rt-workqueue-barrier.patch --]
[-- Type: text/plain, Size: 8091 bytes --]

The plist change to the workqueues left the barrier functionality broken.

The barrier is used for two things:
 - wait_on_work(), and
 - flush_cpu_workqueue().

wait_on_work() - uses the barrier to wait on the completion of the currently
worklet. This was done by inserting a completion barrier at the very head of
the worklist. With plist this would be the head of the highest prio.

In order to do that, we extend the priority range to exceed the normal range
and enqueue it higher than anything else. Another noteworthy point is that
this high prio worklet must not boost the prio further than the waiting task's
prio, even though we enqueue it at prio 100.

flush_cpu_workqueue() - is a full ordering barrier, although as the name
suggests usually used to wait for the worklist to drain. We'll support the
full ordering semantics currently present. This means that:

  W10, W22, W65, B, W80, B, W99

[ where Wn is a worklet at prio n, and B a barrier ]

would most likely execute in the following order:

  W10@99, W65@99, W22@99, W80@99, W99

[ Wn@m is Wn executed at prio m ]
[ W10 would be first because it can start executing while the others 
  are being added ]

Whereas without the barriers it would be:

  W10@99, W99, W80, W65, W22

The prio ordering of the plist makes it hard to impose an extra order on top.
The solution used is to nest plist structures. The example will look like:

  W10, B(B(W65, W22), W80), W99

That is, the barrier will splice the worklist into itself, and enqueue itself
as the next item to run (very first item, highest prio). The barrier will then
run its own plist to completion before 'popping' back to the regular worklist.

To avoid callstack nesting, run_workqueue is taught about this barrier stack.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/workqueue.c |  111 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 95 insertions(+), 16 deletions(-)

Index: linux-2.6/kernel/workqueue.c
===================================================================
--- linux-2.6.orig/kernel/workqueue.c
+++ linux-2.6/kernel/workqueue.c
@@ -36,6 +36,8 @@
 
 #include <asm/uaccess.h>
 
+struct wq_full_barrier;
+
 /*
  * The per-CPU workqueue (if single thread, we always use the first
  * possible cpu).
@@ -52,6 +54,8 @@ struct cpu_workqueue_struct {
 	struct task_struct *thread;
 
 	int run_depth;		/* Detect run_workqueue() recursion depth */
+
+	struct wq_full_barrier *barrier;
 } ____cacheline_aligned;
 
 /*
@@ -125,10 +129,8 @@ struct cpu_workqueue_struct *get_wq_data
 }
 
 static void insert_work(struct cpu_workqueue_struct *cwq,
-				struct work_struct *work, int tail)
+		struct work_struct *work, int prio, int boost_prio)
 {
-	int prio = current->normal_prio;
-
 	set_wq_data(work, cwq);
 	/*
 	 * Ensure that we get the right work->data if we see the
@@ -138,8 +140,8 @@ static void insert_work(struct cpu_workq
 	plist_node_init(&work->entry, prio);
 	plist_add(&work->entry, &cwq->worklist);
 
-	if (prio < cwq->thread->prio)
-		task_setprio(cwq->thread, prio);
+	if (boost_prio < cwq->thread->prio)
+		task_setprio(cwq->thread, boost_prio);
 	wake_up(&cwq->more_work);
 }
 
@@ -150,7 +152,7 @@ static void __queue_work(struct cpu_work
 	unsigned long flags;
 
 	spin_lock_irqsave(&cwq->lock, flags);
-	insert_work(cwq, work, 1);
+	insert_work(cwq, work, current->normal_prio, current->normal_prio);
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -257,8 +259,20 @@ static void leak_check(void *func)
 	dump_stack();
 }
 
+struct wq_full_barrier {
+	struct work_struct		work;
+	struct plist_head		worklist;
+	struct wq_full_barrier 		*prev_barrier;
+	int				prev_prio;
+	int				waiter_prio;
+	struct cpu_workqueue_struct 	*cwq;
+	struct completion		done;
+};
+
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
+	struct plist_head *worklist = &cwq->worklist;
+
 	spin_lock_irq(&cwq->lock);
 	cwq->run_depth++;
 	if (cwq->run_depth > 3) {
@@ -267,16 +281,27 @@ static void run_workqueue(struct cpu_wor
 			__FUNCTION__, cwq->run_depth);
 		dump_stack();
 	}
-	while (!plist_head_empty(&cwq->worklist)) {
-		struct work_struct *work = plist_first_entry(&cwq->worklist,
+
+again:
+	while (!plist_head_empty(worklist)) {
+		int prio;
+		struct work_struct *work = plist_first_entry(worklist,
 						struct work_struct, entry);
 		work_func_t f = work->func;
 
-		if (likely(cwq->thread->prio != work->entry.prio))
-			task_setprio(cwq->thread, work->entry.prio);
+		prio = work->entry.prio;
+		if (unlikely(worklist != &cwq->worklist)) {
+			prio = min(prio, cwq->barrier->prev_prio);
+			prio = min(prio, cwq->barrier->waiter_prio);
+			prio = min(prio, plist_first(&cwq->worklist)->prio);
+		}
+		prio = max(prio, 0);
+
+		if (likely(cwq->thread->prio != prio))
+			task_setprio(cwq->thread, prio);
 
 		cwq->current_work = work;
-		plist_del(&work->entry, &cwq->worklist);
+		plist_del(&work->entry, worklist);
 		plist_node_init(&work->entry, MAX_PRIO);
 		spin_unlock_irq(&cwq->lock);
 
@@ -289,7 +314,27 @@ static void run_workqueue(struct cpu_wor
 
 		spin_lock_irq(&cwq->lock);
 		cwq->current_work = NULL;
+
+		if (unlikely(cwq->barrier))
+			worklist = &cwq->barrier->worklist;
+	}
+
+	if (unlikely(worklist != &cwq->worklist)) {
+		struct wq_full_barrier *barrier = cwq->barrier;
+
+		BUG_ON(!barrier);
+		cwq->barrier = barrier->prev_barrier;
+		complete(&barrier->done);
+
+		if (unlikely(cwq->barrier))
+			worklist = &cwq->barrier->worklist;
+		else
+			worklist = &cwq->worklist;
+
+		if (!plist_head_empty(worklist))
+			goto again;
 	}
+
 	task_setprio(cwq->thread, current->normal_prio);
 	cwq->run_depth--;
 	spin_unlock_irq(&cwq->lock);
@@ -336,14 +381,47 @@ static void wq_barrier_func(struct work_
 }
 
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
-					struct wq_barrier *barr, int tail)
+					struct wq_barrier *barr, int prio)
 {
 	INIT_WORK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
 
 	init_completion(&barr->done);
 
-	insert_work(cwq, &barr->work, tail);
+	insert_work(cwq, &barr->work, prio, current->prio);
+}
+
+static void wq_full_barrier_func(struct work_struct *work)
+{
+	struct wq_full_barrier *barrier =
+		container_of(work, struct wq_full_barrier, work);
+	struct cpu_workqueue_struct *cwq = barrier->cwq;
+	int prio = MAX_PRIO;
+
+	spin_lock_irq(&cwq->lock);
+	barrier->prev_barrier = cwq->barrier;
+	if (cwq->barrier) {
+		prio = min(prio, cwq->barrier->waiter_prio);
+		prio = min(prio, plist_first(&cwq->barrier->worklist)->prio);
+	}
+	barrier->prev_prio = prio;
+	cwq->barrier = barrier;
+	spin_unlock_irq(&cwq->lock);
+}
+
+static void insert_wq_full_barrier(struct cpu_workqueue_struct *cwq,
+		struct wq_full_barrier *barr)
+{
+	INIT_WORK(&barr->work, wq_full_barrier_func);
+	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
+
+	plist_head_init(&barr->worklist, NULL);
+	plist_head_splice(&cwq->worklist, &barr->worklist);
+	barr->cwq = cwq;
+	init_completion(&barr->done);
+	barr->waiter_prio = current->prio;
+
+	insert_work(cwq, &barr->work, 0, current->prio);
 }
 
 static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -358,13 +436,13 @@ static int flush_cpu_workqueue(struct cp
 		run_workqueue(cwq);
 		active = 1;
 	} else {
-		struct wq_barrier barr;
+		struct wq_full_barrier barr;
 
 		active = 0;
 		spin_lock_irq(&cwq->lock);
 		if (!plist_head_empty(&cwq->worklist) ||
 			cwq->current_work != NULL) {
-			insert_wq_barrier(cwq, &barr, 1);
+			insert_wq_full_barrier(cwq, &barr);
 			active = 1;
 		}
 		spin_unlock_irq(&cwq->lock);
@@ -448,7 +526,7 @@ static void wait_on_cpu_work(struct cpu_
 
 	spin_lock_irq(&cwq->lock);
 	if (unlikely(cwq->current_work == work)) {
-		insert_wq_barrier(cwq, &barr, 0);
+		insert_wq_barrier(cwq, &barr, -1);
 		running = 1;
 	}
 	spin_unlock_irq(&cwq->lock);
@@ -759,6 +837,7 @@ init_cpu_workqueue(struct workqueue_stru
 	spin_lock_init(&cwq->lock);
 	plist_head_init(&cwq->worklist, NULL);
 	init_waitqueue_head(&cwq->more_work);
+	cwq->barrier = NULL;
 
 	return cwq;
 }

--


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 2/5] rt: list_splice2
  2007-10-23 12:03 ` [RFC/PATCH 2/5] rt: list_splice2 Peter Zijlstra
@ 2007-10-23 14:08   ` Steven Rostedt
  0 siblings, 0 replies; 12+ messages in thread
From: Steven Rostedt @ 2007-10-23 14:08 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Daniel Walker, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov



--
On Tue, 23 Oct 2007, Peter Zijlstra wrote:
>
>  /**
> @@ -341,7 +341,13 @@ static inline void __list_splice(struct
>  static inline void list_splice(struct list_head *list, struct list_head *head)
>  {
>  	if (!list_empty(list))
> -		__list_splice(list, head);
> +		__list_splice(list, head, head->next);
> +}
> +


> Index: linux-2.6/lib/lock_list.c
> ===================================================================
> --- linux-2.6.orig/lib/lock_list.c
> +++ linux-2.6/lib/lock_list.c
> @@ -128,7 +128,7 @@ void lock_list_splice_init(struct lock_l
>  	lock = __lock_list_reverse(list);
>  	if (!list_empty(&list->head)) {
>  		spin_lock_nested(&head->lock, LOCK_LIST_NESTING_NEXT);
> -		__list_splice(&list->head, &head->head);
> +		__list_splice(&list->head, &head->head, head->head.next);

Can't this just now be list_splice and not __list_splice?

Yes it tests for list_empty again, but that should (hopefully) be
optimized out.

-- Steve

>  		INIT_LIST_HEAD(&list->head);
>  		spin_unlock(&head->lock);
>  	}
>
> --
>
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 3/5] rt: plist_head_splice
  2007-10-23 12:04 ` [RFC/PATCH 3/5] rt: plist_head_splice Peter Zijlstra
@ 2007-10-23 15:10   ` Steven Rostedt
  2007-10-23 16:26     ` Peter Zijlstra
  2007-10-23 17:45     ` Peter Zijlstra
  0 siblings, 2 replies; 12+ messages in thread
From: Steven Rostedt @ 2007-10-23 15:10 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Daniel Walker, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov


--
On Tue, 23 Oct 2007, Peter Zijlstra wrote:
> +
> +void plist_head_splice(struct plist_head *src, struct plist_head *dst)
> +{
> +	struct plist_node *src_iter_first, *src_iter_last, *dst_iter;
> +	struct plist_node *tail = container_of(dst, struct plist_node, plist);
> +
> +	dst_iter = next_prio(tail);
> +
> +	while (!plist_head_empty(src) && dst_iter != tail) {
> +		src_iter_first = plist_first(src);
> +
> +		src_iter_last = next_prio(src_iter_first);
> +		src_iter_last = prev_node(src_iter_last);
> +
> +		WARN_ON(src_iter_first->prio != src_iter_last->prio);
> +		WARN_ON(list_empty(&src_iter_first->plist.prio_list));
> +
> +		while (src_iter_first->prio > dst_iter->prio) {
> +			dst_iter = next_prio(dst_iter);
> +			if (dst_iter == tail)
> +				goto tail;
> +		}
> +
> +		list_del_init(&src_iter_first->plist.prio_list);
> +
> +		if (src_iter_first->prio < dst_iter->prio) {

I may be confused here, but shouldn't we be linking the
src_iter_first->prio_list somewhere here? Doesn't all different prios need
to be in its separate prio_list? Otherwise two splices in a row can cause
the above WARN_ON (prio != prio).

-- Steve


> +			list_add_tail(&src_iter_first->plist.node_list,
> +					&dst_iter->plist.node_list);
> +		} else if (src_iter_first->prio == dst_iter->prio) {
> +			dst_iter = next_prio(dst_iter);
> +		} else BUG();
> +
> +		list_splice2_tail(&src_iter_first->plist.node_list,
> +			       	  &src_iter_last->plist.node_list,
> +				  &dst_iter->plist.node_list);
> +	}
> +
> +tail:
> +	list_splice_tail_init(&src->prio_list, &dst->prio_list);
> +	list_splice_tail_init(&src->node_list, &dst->node_list);
> +}
>
> --
>
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 3/5] rt: plist_head_splice
  2007-10-23 15:10   ` Steven Rostedt
@ 2007-10-23 16:26     ` Peter Zijlstra
  2007-10-23 17:45     ` Peter Zijlstra
  1 sibling, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 16:26 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Daniel Walker, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov

[-- Attachment #1: Type: text/plain, Size: 1935 bytes --]

On Tue, 2007-10-23 at 11:10 -0400, Steven Rostedt wrote:
> --
> On Tue, 23 Oct 2007, Peter Zijlstra wrote:
> > +
> > +void plist_head_splice(struct plist_head *src, struct plist_head *dst)
> > +{
> > +	struct plist_node *src_iter_first, *src_iter_last, *dst_iter;
> > +	struct plist_node *tail = container_of(dst, struct plist_node, plist);
> > +
> > +	dst_iter = next_prio(tail);
> > +
> > +	while (!plist_head_empty(src) && dst_iter != tail) {
> > +		src_iter_first = plist_first(src);
> > +
> > +		src_iter_last = next_prio(src_iter_first);
> > +		src_iter_last = prev_node(src_iter_last);
> > +
> > +		WARN_ON(src_iter_first->prio != src_iter_last->prio);
> > +		WARN_ON(list_empty(&src_iter_first->plist.prio_list));
> > +
> > +		while (src_iter_first->prio > dst_iter->prio) {
> > +			dst_iter = next_prio(dst_iter);
> > +			if (dst_iter == tail)
> > +				goto tail;
> > +		}
> > +
> > +		list_del_init(&src_iter_first->plist.prio_list);
> > +
> > +		if (src_iter_first->prio < dst_iter->prio) {
> 
> I may be confused here, but shouldn't we be linking the
> src_iter_first->prio_list somewhere here? Doesn't all different prios need
> to be in its separate prio_list? Otherwise two splices in a row can cause
> the above WARN_ON (prio != prio).
> 

> > +			list_add_tail(&src_iter_first->plist.node_list,
> > +					&dst_iter->plist.node_list);

Uhm, yeah, that was supposed the be prio_list indeed.

Thanks!

> > +		} else if (src_iter_first->prio == dst_iter->prio) {
> > +			dst_iter = next_prio(dst_iter);
> > +		} else BUG();
> > +
> > +		list_splice2_tail(&src_iter_first->plist.node_list,
> > +			       	  &src_iter_last->plist.node_list,
> > +				  &dst_iter->plist.node_list);
> > +	}
> > +
> > +tail:
> > +	list_splice_tail_init(&src->prio_list, &dst->prio_list);
> > +	list_splice_tail_init(&src->node_list, &dst->node_list);
> > +}
> >
> > --
> >
> >
> >

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC/PATCH 3/5] rt: plist_head_splice
  2007-10-23 15:10   ` Steven Rostedt
  2007-10-23 16:26     ` Peter Zijlstra
@ 2007-10-23 17:45     ` Peter Zijlstra
  1 sibling, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 17:45 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Daniel Walker, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov

Index: linux-2.6/lib/plist.c
===================================================================
--- linux-2.6.orig/lib/plist.c
+++ linux-2.6/lib/plist.c
@@ -167,8 +167,8 @@ void plist_head_splice(struct plist_head
 		list_del_init(&src_iter_first->plist.prio_list);
 
 		if (src_iter_first->prio < dst_iter->prio) {
-			list_add_tail(&src_iter_first->plist.node_list,
-					&dst_iter->plist.node_list);
+			list_add_tail(&src_iter_first->plist.prio_list,
+					&dst_iter->plist.prio_list);
 		} else if (src_iter_first->prio == dst_iter->prio) {
 			dst_iter = next_prio(dst_iter);
 		} else BUG();



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH 6/5] rt: PI-workqueue: wait_on_work() fixup
  2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
                   ` (4 preceding siblings ...)
  2007-10-23 12:04 ` [RFC/PATCH 5/5] rt: PI-workqueue: fix barriers Peter Zijlstra
@ 2007-10-23 19:22 ` Peter Zijlstra
  2007-10-23 19:22 ` [RFC/PATCH 7/5] rt: PI-workqueue: propagate prio for delayed work Peter Zijlstra
  6 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 19:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: Daniel Walker, Steven Rostedt, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov

Subject: rt: PI-workqueue: wait_on_work() fixup

Oleg noticed that the new wait_on_work() barrier does not properly interact
with the nesting barrier.

The problem is that a wait_on_work() targeted at a worklet in a nested list
will complete too late.

Fix this by using a wait_queue instead.

[ will be folded into the previous patch on next posting ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/workqueue.c |   74 ++++++++++++++++++++---------------------------------
 1 file changed, 29 insertions(+), 45 deletions(-)

Index: linux-2.6/kernel/workqueue.c
===================================================================
--- linux-2.6.orig/kernel/workqueue.c
+++ linux-2.6/kernel/workqueue.c
@@ -33,10 +33,11 @@
 #include <linux/freezer.h>
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
+#include <linux/wait.h>
 
 #include <asm/uaccess.h>
 
-struct wq_full_barrier;
+struct wq_barrier;
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -55,7 +56,8 @@ struct cpu_workqueue_struct {
 
 	int run_depth;		/* Detect run_workqueue() recursion depth */
 
-	struct wq_full_barrier *barrier;
+	wait_queue_head_t work_done;
+	struct wq_barrier *barrier;
 } ____cacheline_aligned;
 
 /*
@@ -259,10 +261,10 @@ static void leak_check(void *func)
 	dump_stack();
 }
 
-struct wq_full_barrier {
+struct wq_barrier {
 	struct work_struct		work;
 	struct plist_head		worklist;
-	struct wq_full_barrier 		*prev_barrier;
+	struct wq_barrier 		*prev_barrier;
 	int				prev_prio;
 	int				waiter_prio;
 	struct cpu_workqueue_struct 	*cwq;
@@ -314,13 +316,13 @@ again:
 
 		spin_lock_irq(&cwq->lock);
 		cwq->current_work = NULL;
-
+		wake_up_all(&cwq->work_done);
 		if (unlikely(cwq->barrier))
 			worklist = &cwq->barrier->worklist;
 	}
 
 	if (unlikely(worklist != &cwq->worklist)) {
-		struct wq_full_barrier *barrier = cwq->barrier;
+		struct wq_barrier *barrier = cwq->barrier;
 
 		BUG_ON(!barrier);
 		cwq->barrier = barrier->prev_barrier;
@@ -369,32 +371,10 @@ static int worker_thread(void *__cwq)
 	return 0;
 }
 
-struct wq_barrier {
-	struct work_struct	work;
-	struct completion	done;
-};
-
 static void wq_barrier_func(struct work_struct *work)
 {
-	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
-	complete(&barr->done);
-}
-
-static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
-					struct wq_barrier *barr, int prio)
-{
-	INIT_WORK(&barr->work, wq_barrier_func);
-	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
-
-	init_completion(&barr->done);
-
-	insert_work(cwq, &barr->work, prio, current->prio);
-}
-
-static void wq_full_barrier_func(struct work_struct *work)
-{
-	struct wq_full_barrier *barrier =
-		container_of(work, struct wq_full_barrier, work);
+	struct wq_barrier *barrier =
+		container_of(work, struct wq_barrier, work);
 	struct cpu_workqueue_struct *cwq = barrier->cwq;
 	int prio = MAX_PRIO;
 
@@ -409,10 +389,10 @@ static void wq_full_barrier_func(struct 
 	spin_unlock_irq(&cwq->lock);
 }
 
-static void insert_wq_full_barrier(struct cpu_workqueue_struct *cwq,
-		struct wq_full_barrier *barr)
+static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
+		struct wq_barrier *barr)
 {
-	INIT_WORK(&barr->work, wq_full_barrier_func);
+	INIT_WORK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
 
 	plist_head_init(&barr->worklist, NULL);
@@ -436,13 +416,13 @@ static int flush_cpu_workqueue(struct cp
 		run_workqueue(cwq);
 		active = 1;
 	} else {
-		struct wq_full_barrier barr;
+		struct wq_barrier barr;
 
 		active = 0;
 		spin_lock_irq(&cwq->lock);
 		if (!plist_head_empty(&cwq->worklist) ||
 			cwq->current_work != NULL) {
-			insert_wq_full_barrier(cwq, &barr);
+			insert_wq_barrier(cwq, &barr);
 			active = 1;
 		}
 		spin_unlock_irq(&cwq->lock);
@@ -518,21 +498,24 @@ static int try_to_grab_pending(struct wo
 	return ret;
 }
 
-static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
-				struct work_struct *work)
+static inline
+int is_current_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
 {
-	struct wq_barrier barr;
-	int running = 0;
+	int ret;
 
 	spin_lock_irq(&cwq->lock);
-	if (unlikely(cwq->current_work == work)) {
-		insert_wq_barrier(cwq, &barr, -1);
-		running = 1;
-	}
+	ret = (cwq->current_work == work);
 	spin_unlock_irq(&cwq->lock);
 
-	if (unlikely(running))
-		wait_for_completion(&barr.done);
+	return ret;
+}
+
+static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
+				struct work_struct *work)
+{
+	DEFINE_WAIT(wait);
+
+	wait_event(cwq->work_done, is_current_work(cwq, work));
 }
 
 static void wait_on_work(struct work_struct *work)
@@ -838,6 +821,7 @@ init_cpu_workqueue(struct workqueue_stru
 	plist_head_init(&cwq->worklist, NULL);
 	init_waitqueue_head(&cwq->more_work);
 	cwq->barrier = NULL;
+	init_waitqueue_head(&cwq->work_done);
 
 	return cwq;
 }



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC/PATCH 7/5] rt: PI-workqueue: propagate prio for delayed work
  2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
                   ` (5 preceding siblings ...)
  2007-10-23 19:22 ` [RFC/PATCH 6/5] rt: PI-workqueue: wait_on_work() fixup Peter Zijlstra
@ 2007-10-23 19:22 ` Peter Zijlstra
  6 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2007-10-23 19:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: Daniel Walker, Steven Rostedt, Ingo Molnar, Thomas Gleixner,
	Gregory Haskins, Oleg Nesterov

Subject: rt: PI-workqueue: propagate prio for delayed work

Delayed work looses its enqueue priority, and will be enqueued on the prio
of the softirq thread. Ammend this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/workqueue.h |    1 +
 kernel/workqueue.c        |   16 ++++++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

Index: linux-2.6/include/linux/workqueue.h
===================================================================
--- linux-2.6.orig/include/linux/workqueue.h
+++ linux-2.6/include/linux/workqueue.h
@@ -36,6 +36,7 @@ struct work_struct {
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
+	int prio;
 };
 
 struct execute_work {
Index: linux-2.6/kernel/workqueue.c
===================================================================
--- linux-2.6.orig/kernel/workqueue.c
+++ linux-2.6/kernel/workqueue.c
@@ -149,12 +149,12 @@ static void insert_work(struct cpu_workq
 
 /* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
-			 struct work_struct *work)
+			 struct work_struct *work, int prio)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&cwq->lock, flags);
-	insert_work(cwq, work, current->normal_prio, current->normal_prio);
+	insert_work(cwq, work, prio, prio);
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -176,7 +176,7 @@ int fastcall queue_work(struct workqueue
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		BUG_ON(!plist_node_empty(&work->entry));
-		__queue_work(wq_per_cpu(wq, cpu), work);
+		__queue_work(wq_per_cpu(wq, cpu), work, current->normal_prio);
 		ret = 1;
 	}
 	return ret;
@@ -189,7 +189,8 @@ void delayed_work_timer_fn(unsigned long
 	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
 	struct workqueue_struct *wq = cwq->wq;
 
-	__queue_work(wq_per_cpu(wq, raw_smp_processor_id()), &dwork->work);
+	__queue_work(wq_per_cpu(wq, raw_smp_processor_id()),
+			&dwork->work, dwork->prio);
 }
 
 /**
@@ -232,6 +233,7 @@ int queue_delayed_work_on(int cpu, struc
 		BUG_ON(!plist_node_empty(&work->entry));
 
 		/* This stores cwq for the moment, for the timer_fn */
+		dwork->prio = current->normal_prio;
 		set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
@@ -702,7 +704,8 @@ int schedule_on_each_cpu(void (*func)(vo
 		work->info = info;
 		INIT_WORK(&work->work, schedule_on_each_cpu_func);
 		set_bit(WORK_STRUCT_PENDING, work_data_bits(&work->work));
-		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), &work->work);
+		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
+				&work->work, current->normal_prio);
 	}
 	unlock_cpu_hotplug();
 
@@ -749,7 +752,8 @@ int schedule_on_each_cpu_wq(struct workq
 
 		INIT_WORK(work, func);
 		set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
-		__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+		__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work,
+				current->normal_prio);
 	}
 	flush_workqueue(wq);
 	free_percpu(works);



^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2007-10-23 19:23 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-23 12:03 [RFC/PATCH 0/5] rt: workqueue PI support -v2 Peter Zijlstra
2007-10-23 12:03 ` [RFC/PATCH 1/5] rt: rename rt_mutex_setprio to task_setprio Peter Zijlstra
2007-10-23 12:03 ` [RFC/PATCH 2/5] rt: list_splice2 Peter Zijlstra
2007-10-23 14:08   ` Steven Rostedt
2007-10-23 12:04 ` [RFC/PATCH 3/5] rt: plist_head_splice Peter Zijlstra
2007-10-23 15:10   ` Steven Rostedt
2007-10-23 16:26     ` Peter Zijlstra
2007-10-23 17:45     ` Peter Zijlstra
2007-10-23 12:04 ` [RFC/PATCH 4/5] rt: PI-workqueue support Peter Zijlstra
2007-10-23 12:04 ` [RFC/PATCH 5/5] rt: PI-workqueue: fix barriers Peter Zijlstra
2007-10-23 19:22 ` [RFC/PATCH 6/5] rt: PI-workqueue: wait_on_work() fixup Peter Zijlstra
2007-10-23 19:22 ` [RFC/PATCH 7/5] rt: PI-workqueue: propagate prio for delayed work Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).