All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 1/3] ipc/sem: fix -rt livelock
       [not found] <1379051751.5455.112.camel@marge.simpson.net>
@ 2013-09-13  6:12 ` Mike Galbraith
  2013-09-14 12:24   ` Manfred Spraul
  2013-09-14 21:46   ` Manfred Spraul
  2013-09-13  6:12 ` [patch 2/3] ipc/sem: revert ipc/sem: Rework semaphore wakeups Mike Galbraith
  2013-09-13  6:12 ` [patch 3/3] ipc/sem: Add -RT compatible wakeup scheme Mike Galbraith
  2 siblings, 2 replies; 15+ messages in thread
From: Mike Galbraith @ 2013-09-13  6:12 UTC (permalink / raw)
  To: linux-rt-users
  Cc: Manfred Spraul, Steven Rostedt, Thomas Gleixner,
	Sebastian Andrzej Siewior, Peter Zijlstra

goto again loop can and does induce livelock in -rt.  Remove it.

spin_unlock_wait(lock) in -rt kernels takes/releases the lock in question, so
all it takes to create a self perpetuating loop is for one task to start the
ball rolling by taking the array lock, other tasks see this, and react by
take/release/retry endlessly.

Signed-off-by: Mike Galbraith <bitbucket@online.de>

---
 ipc/sem.c |   56 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 18 deletions(-)

Index: linux-2.6/ipc/sem.c
===================================================================
--- linux-2.6.orig/ipc/sem.c
+++ linux-2.6/ipc/sem.c
@@ -208,22 +208,11 @@ void __init sem_init (void)
 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 			      int nsops)
 {
+	struct sem *sem;
 	int locknum;
- again:
-	if (nsops == 1 && !sma->complex_count) {
-		struct sem *sem = sma->sem_base + sops->sem_num;
-
-		/* Lock just the semaphore we are interested in. */
-		spin_lock(&sem->lock);
 
-		/*
-		 * If sma->complex_count was set while we were spinning,
-		 * we may need to look at things we did not lock here.
-		 */
-		if (unlikely(sma->complex_count)) {
-			spin_unlock(&sem->lock);
-			goto lock_array;
-		}
+	if (nsops == 1 && !sma->complex_count) {
+		sem = sma->sem_base + sops->sem_num;
 
 		/*
 		 * Another process is holding the global lock on the
@@ -231,9 +220,29 @@ static inline int sem_lock(struct sem_ar
 		 * but have to wait for the global lock to be released.
 		 */
 		if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
-			spin_unlock(&sem->lock);
-			spin_unlock_wait(&sma->sem_perm.lock);
-			goto again;
+			spin_lock(&sma->sem_perm.lock);
+			if (sma->complex_count)
+				goto wait_array;
+
+			/*
+			 * Acquiring our sem->lock under the global lock
+			 * forces new complex operations to wait for us
+			 * to exit our critical section.
+			 */
+			spin_lock(&sem->lock);
+			spin_unlock(&sma->sem_perm.lock);
+		} else {
+			/* Lock just the semaphore we are interested in. */
+			spin_lock(&sem->lock);
+
+			/*
+			 * If sma->complex_count was set prior to acquisition,
+			 * we must fall back to the global array lock.
+			 */
+			if (unlikely(sma->complex_count)) {
+				spin_unlock(&sem->lock);
+				goto lock_array;
+			}
 		}
 
 		locknum = sops->sem_num;
@@ -247,11 +256,22 @@ static inline int sem_lock(struct sem_ar
 		 */
  lock_array:
 		spin_lock(&sma->sem_perm.lock);
+ wait_array:
 		for (i = 0; i < sma->sem_nsems; i++) {
-			struct sem *sem = sma->sem_base + i;
+			sem = sma->sem_base + i;
+#ifdef CONFIG_PREEMPT_RT_BASE
+			if (spin_is_locked(&sem->lock))
+#endif
 			spin_unlock_wait(&sem->lock);
 		}
 		locknum = -1;
+
+		if (nsops == 1 && !sma->complex_count) {
+			sem = sma->sem_base + sops->sem_num;
+			spin_lock(&sem->lock);
+			spin_unlock(&sma->sem_perm.lock);
+			locknum = sops->sem_num;
+		}
 	}
 	return locknum;
 }




^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 2/3] ipc/sem: revert ipc/sem: Rework semaphore wakeups
       [not found] <1379051751.5455.112.camel@marge.simpson.net>
  2013-09-13  6:12 ` [patch 1/3] ipc/sem: fix -rt livelock Mike Galbraith
@ 2013-09-13  6:12 ` Mike Galbraith
  2013-09-13  6:12 ` [patch 3/3] ipc/sem: Add -RT compatible wakeup scheme Mike Galbraith
  2 siblings, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2013-09-13  6:12 UTC (permalink / raw)
  To: linux-rt-users
  Cc: Manfred Spraul, Steven Rostedt, Thomas Gleixner,
	Sebastian Andrzej Siewior, Peter Zijlstra

ipc/sem: revert ipc/sem: Rework semaphore wakeups

Revert Peterz's -rt wakeup scheme to prepare for replacement with a new
completion scheme by Manfred Spraul.

<original changelog>
ipc/sem: Rework semaphore wakeups
Current sysv sems have a weird ass wakeup scheme that involves keeping
preemption disabled over a potential O(n^2) loop and busy waiting on
that on other CPUs.

Kill this and simply wake the task directly from under the sem_lock.

This was discovered by a migrate_disable() debug feature that
disallows:

  spin_lock();
  preempt_disable();
  spin_unlock()
  preempt_enable();
</original changelog>

Signed-off-by: 	Mike Galbraith <bitbucket@online.de>
---
 ipc/sem.c |   24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -155,7 +155,7 @@ static int sysvipc_sem_proc_show(struct
  *	sem_array.sem_pending{,last},
  *	sem_array.sem_undo: sem_lock() for read/write
  *	sem_undo.proc_next: only "current" is allowed to read/write that field.
- *
+ *
  */
 
 #define sc_semmsl	sem_ctls[0]
@@ -518,7 +518,7 @@ static int try_atomic_semop (struct sem_
 		curr = sma->sem_base + sop->sem_num;
 		sem_op = sop->sem_op;
 		result = curr->semval;
-
+
 		if (!sem_op && result)
 			goto would_block;
 
@@ -545,7 +545,7 @@ static int try_atomic_semop (struct sem_
 			un->semadj[sop->sem_num] -= sop->sem_op;
 		sop--;
 	}
-
+
 	return 0;
 
 out_of_range:
@@ -577,13 +577,6 @@ static int try_atomic_semop (struct sem_
 static void wake_up_sem_queue_prepare(struct list_head *pt,
 				struct sem_queue *q, int error)
 {
-#ifdef CONFIG_PREEMPT_RT_BASE
-	struct task_struct *p = q->sleeper;
-	get_task_struct(p);
-	q->status = error;
-	wake_up_process(p);
-	put_task_struct(p);
-#else
 	if (list_empty(pt)) {
 		/*
 		 * Hold preempt off so that we don't get preempted and have the
@@ -595,7 +588,6 @@ static void wake_up_sem_queue_prepare(st
 	q->pid = error;
 
 	list_add_tail(&q->list, pt);
-#endif
 }
 
 /**
@@ -609,7 +601,6 @@ static void wake_up_sem_queue_prepare(st
  */
 static void wake_up_sem_queue_do(struct list_head *pt)
 {
-#ifndef CONFIG_PREEMPT_RT_BASE
 	struct sem_queue *q, *t;
 	int did_something;
 
@@ -622,7 +613,6 @@ static void wake_up_sem_queue_do(struct
 	}
 	if (did_something)
 		preempt_enable();
-#endif
 }
 
 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
@@ -977,7 +967,7 @@ static int semctl_nolock(struct ipc_name
 		err = security_sem_semctl(NULL, cmd);
 		if (err)
 			return err;
-
+
 		memset(&seminfo,0,sizeof(seminfo));
 		seminfo.semmni = ns->sc_semmni;
 		seminfo.semmns = ns->sc_semmns;
@@ -997,7 +987,7 @@ static int semctl_nolock(struct ipc_name
 		}
 		max_id = ipc_get_maxid(&sem_ids(ns));
 		up_read(&sem_ids(ns).rw_mutex);
-		if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
+		if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
 			return -EFAULT;
 		return (max_id < 0) ? 0: max_id;
 	}
@@ -1672,7 +1662,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
 	/* We need to sleep on this operation, so we put the current
 	 * task into the pending queue and go to sleep.
 	 */

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 3/3] ipc/sem: Add -RT compatible wakeup scheme
       [not found] <1379051751.5455.112.camel@marge.simpson.net>
  2013-09-13  6:12 ` [patch 1/3] ipc/sem: fix -rt livelock Mike Galbraith
  2013-09-13  6:12 ` [patch 2/3] ipc/sem: revert ipc/sem: Rework semaphore wakeups Mike Galbraith
@ 2013-09-13  6:12 ` Mike Galbraith
  2 siblings, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2013-09-13  6:12 UTC (permalink / raw)
  To: linux-rt-users
  Cc: Manfred Spraul, Steven Rostedt, Thomas Gleixner,
	Sebastian Andrzej Siewior, Peter Zijlstra

From: Manfred Spraul <manfred@colorfullife.com>

ipc/sem.c uses a custom wakeup scheme that relies on preempt_disable() to
prevent livelocks.  On -RT, this causes increased latencies and debug warnings.

A standard waitqueue is not used because a standard wait queue that waits
with a timeout (or with TASK_INTERRUPTIBLE) doesn't allow the caller of
add_wait_queue to figure out if the thread was woken up to due expiry of
the timer or due to an explicit wake_up() call.

a) The ipc code must know that in order to decide if the task must return
with -EAGAIN/-EINTR or with 0. Therefore ipc/sem.c, ipc/msg.c and 
ipc/mqueue.c use custom queues.

b) These custom queues have a lockless fast-path for a successful 
operation - and this fast path must handle the various races that may 
happen the timer expires in parallel with a wake_up() call.

The patch:
- separates the wakeup scheme into helper functions
- adds a scheme built around a completion.

The preferred solution (use a spinlock instead of the completion) does not
work, because there is a limit of at most 256 concurrent spinlocks.

Mike: style adjustments which Manfred may or may not ack.  The credit is
his, the blame for any misapplication of his code is mine.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Mike Galbraith <bitbucket@online.de>
---
 ipc/sem.c |  204 +++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 123 insertions(+), 81 deletions(-)

--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -61,8 +61,8 @@
  * - A woken up task may not even touch the semaphore array anymore, it may
  *   have been destroyed already by a semctl(RMID).
  * - The synchronizations between wake-ups due to a timeout/signal and a
- *   wake-up due to a completed semaphore operation is achieved by using an
- *   intermediate state (IN_WAKEUP).
+ *   wake-up due to a completed semaphore operation is achieved by using a
+ *   special wakeup scheme (queuewakeup_wait and support functions)
  * - UNDO values are stored in an array (one per process and per
  *   semaphore array, lazily allocated). For backwards compatibility, multiple
  *   modes for the UNDO variables are supported (per process, per thread)
@@ -90,6 +90,84 @@
 #include <asm/uaccess.h>
 #include "util.h"
 
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+#define SYSVSEM_COMPLETION 1
+#else
+#define SYSVSEM_CUSTOM 1
+#endif
+
+#ifdef SYSVSEM_COMPLETION
+/* Using a completion causes some overhead, but avoids a busy loop
+ * that increases the worst case latency.
+ */
+struct queue_done {
+	struct completion done;
+};
+
+static void queuewakeup_prepare(void)
+{
+	/* no preparation necessary */
+}
+
+static void queuewakeup_completed(void)
+{
+	/* empty */
+}
+
+static void queuewakeup_handsoff(struct queue_done *qd)
+{
+	complete_all(&qd->done);
+}
+
+static void queuewakeup_init(struct queue_done *qd)
+{
+	init_completion(&qd->done);
+}
+
+static void queuewakeup_wait(struct queue_done *qd)
+{
+	wait_for_completion(&qd->done);
+}
+
+#elif defined(SYSVSEM_CUSTOM)
+struct queue_done {
+	atomic_t done;
+};
+
+static void queuewakeup_prepare(void)
+{
+	preempt_disable();
+}
+
+static void queuewakeup_completed(void)
+{
+	preempt_enable();
+}
+
+static void queuewakeup_handsoff(struct queue_done *qd)
+{
+	BUG_ON(atomic_read(&qd->done) != 2);
+	smp_mb();
+	atomic_set(&qd->done, 1);
+}
+
+static void queuewakeup_init(struct queue_done *qd)
+{
+	atomic_set(&qd->done, 2);
+}
+
+static void queuewakeup_wait(struct queue_done *qd)
+{
+	while (atomic_read(&qd->done) != 1)
+		cpu_relax();
+
+	smp_mb();
+}
+#else
+#error Unknown locking primitive
+#endif
+
 /* One semaphore structure for each semaphore in the system. */
 struct sem {
 	int	semval;		/* current value */
@@ -108,6 +186,7 @@ struct sem_queue {
 	struct sembuf		*sops;	 /* array of pending operations */
 	int			nsops;	 /* number of operations */
 	int			alter;	 /* does *sops alter the array? */
+	struct queue_done	done;	 /* completion synchronization */
 };
 
 /* Each task has a list of undo requests. They are executed automatically
@@ -354,27 +433,34 @@ static inline void sem_rmid(struct ipc_n
 
 /*
  * Lockless wakeup algorithm:
- * Without the check/retry algorithm a lockless wakeup is possible:
+ * The whole task of choosing tasks to wake up is done by the thread that
+ * does the wakeup. For the woken up thread, this makes the following
+ * lockless wakeup possible:
  * - queue.status is initialized to -EINTR before blocking.
  * - wakeup is performed by
- *	* unlinking the queue entry from sma->sem_pending
- *	* setting queue.status to IN_WAKEUP
- *	  This is the notification for the blocked thread that a
- *	  result value is imminent.
+ * 	* call queuewakeup_prepare() once. This is necessary to prevent
+ *	  livelocks.
+ *	* setting queue.status to the actual result code
  *	* call wake_up_process
- *	* set queue.status to the final value.
+ *	* queuewakeup_handsoff(&q->done);
+ *	* call queuewakeup_completed() once.
  * - the previously blocked thread checks queue.status:
- *   	* if it's IN_WAKEUP, then it must wait until the value changes
- *   	* if it's not -EINTR, then the operation was completed by
- *   	  update_queue. semtimedop can return queue.status without
- *   	  performing any operation on the sem array.
- *   	* otherwise it must acquire the spinlock and check what's up.
+ *	* if it's not -EINTR, then someone completed the operation.
+ *	  First, queuewakeup_wait() must be called. Afterwards,
+ *	  semtimedop must return queue.status without performing any
+ *	  operation on the sem array.
+ *	* otherwise it must acquire the spinlock and repeat the test
+ *	    (including: call queuewakeup_wait() if there is a return code)
+ *	* If it is still -EINTR, then no update_queue() completed the
+ *	    operation, thus semtimedop() can proceed normally.
  *
- * The two-stage algorithm is necessary to protect against the following
+ * queuewakeup_wait() is necessary to protect against the following
  * races:
  * - if queue.status is set after wake_up_process, then the woken up idle
  *   thread could race forward and try (and fail) to acquire sma->lock
- *   before update_queue had a chance to set queue.status
+ *   before update_queue had a chance to set queue.status.
+ *   More importantly, it would mean that wake_up_process must be done
+ *   while holding sma->lock, i.e. this would reduce the scalability.
  * - if queue.status is written before wake_up_process and if the
  *   blocked process is woken up by a signal between writing
  *   queue.status and the wake_up_process, then the woken up
@@ -384,7 +470,6 @@ static inline void sem_rmid(struct ipc_n
  *   (yes, this happened on s390 with sysv msg).
  *
  */
-#define IN_WAKEUP	1
 
 /**
  * newary - Create a new semaphore set
@@ -577,16 +662,10 @@ static int try_atomic_semop (struct sem_
 static void wake_up_sem_queue_prepare(struct list_head *pt,
 				struct sem_queue *q, int error)
 {
-	if (list_empty(pt)) {
-		/*
-		 * Hold preempt off so that we don't get preempted and have the
-		 * wakee busy-wait until we're scheduled back on.
-		 */
-		preempt_disable();
-	}
-	q->status = IN_WAKEUP;
-	q->pid = error;
+	if (list_empty(pt))
+		queuewakeup_prepare();
 
+	q->status = error;
 	list_add_tail(&q->list, pt);
 }
 
@@ -596,8 +675,8 @@ static void wake_up_sem_queue_prepare(st
  *
  * Do the actual wake-up.
  * The function is called without any locks held, thus the semaphore array
- * could be destroyed already and the tasks can disappear as soon as the
- * status is set to the actual return code.
+ * could be destroyed already and the tasks can disappear as soon as
+ * queuewakeup_handsoff() is called.
  */
 static void wake_up_sem_queue_do(struct list_head *pt)
 {
@@ -607,12 +686,11 @@ static void wake_up_sem_queue_do(struct
 	did_something = !list_empty(pt);
 	list_for_each_entry_safe(q, t, pt, list) {
 		wake_up_process(q->sleeper);
-		/* q can disappear immediately after writing q->status. */
-		smp_wmb();
-		q->status = q->pid;
+		/* q can disappear immediately after completing q->done */
+		queuewakeup_handsoff(&q->done);
 	}
 	if (did_something)
-		preempt_enable();
+		queuewakeup_completed();
 }
 
 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
@@ -1527,33 +1605,6 @@ static struct sem_undo *find_alloc_undo(
 	return un;
 }
 
-
-/**
- * get_queue_result - Retrieve the result code from sem_queue
- * @q: Pointer to queue structure
- *
- * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
- * q->status, then we must loop until the value is replaced with the final
- * value: This may happen if a task is woken up by an unrelated event (e.g.
- * signal) and in parallel the task is woken up by another task because it got
- * the requested semaphores.
- *
- * The function can be called with or without holding the semaphore spinlock.
- */
-static int get_queue_result(struct sem_queue *q)
-{
-	int error;
-
-	error = q->status;
-	while (unlikely(error == IN_WAKEUP)) {
-		cpu_relax();
-		error = q->status;
-	}
-
-	return error;
-}
-
-
 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 		unsigned, nsops, const struct timespec __user *, timeout)
 {
@@ -1687,6 +1738,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
 
 	queue.status = -EINTR;
 	queue.sleeper = current;
+	queuewakeup_init(&queue.done);
 
 sleep_again:
 	current->state = TASK_INTERRUPTIBLE;
@@ -1698,17 +1750,14 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
 	else
 		schedule();
 
-	error = get_queue_result(&queue);
+	error = queue.status;
 
 	if (error != -EINTR) {
 		/* fast path: update_queue already obtained all requested
-		 * resources.
-		 * Perform a smp_mb(): User space could assume that semop()
-		 * is a memory barrier: Without the mb(), the cpu could
-		 * speculatively read in user space stale data that was
-		 * overwritten by the previous owner of the semaphore.
+		 * resources. Just ensure that update_queue completed
+		 * it's access to &queue.
 		 */
-		smp_mb();
+		queuewakeup_wait(&queue.done);
 
 		goto out_free;
 	}
@@ -1719,26 +1768,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
 	/*
 	 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
 	 */
-	error = get_queue_result(&queue);
-
-	/*
-	 * Array removed? If yes, leave without sem_unlock().
-	 */
-	if (IS_ERR(sma)) {
+	error = queue.status;
+	if (error != -EINTR) {
+		/* If there is a return code, then we can leave immediately. */
+		if (!IS_ERR(sma)) {
+			/* sem_lock() succeeded - then unlock */
+			sem_unlock(sma, locknum);
+		}
 		rcu_read_unlock();
+		/* Except that we must wait for the hands-off */
+		queuewakeup_wait(&queue.done);
 		goto out_free;
 	}
 
-
-	/*
-	 * If queue.status != -EINTR we are woken up by another process.
-	 * Leave without unlink_queue(), but with sem_unlock().
-	 */
-
-	if (error != -EINTR) {
-		goto out_unlock_free;
-	}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-09-13  6:12 ` [patch 1/3] ipc/sem: fix -rt livelock Mike Galbraith
@ 2013-09-14 12:24   ` Manfred Spraul
  2013-09-15  3:41     ` Mike Galbraith
  2013-09-14 21:46   ` Manfred Spraul
  1 sibling, 1 reply; 15+ messages in thread
From: Manfred Spraul @ 2013-09-14 12:24 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Sebastian Andrzej Siewior, Peter Zijlstra

Hi Mike,

On 09/13/2013 08:12 AM, Mike Galbraith wrote:
> goto again loop can and does induce livelock in -rt.  Remove it.
>
> spin_unlock_wait(lock) in -rt kernels takes/releases the lock in question, so
> all it takes to create a self perpetuating loop is for one task to start the
> ball rolling by taking the array lock, other tasks see this, and react by
> take/release/retry endlessly.
>
> Signed-off-by: Mike Galbraith <bitbucket@online.de>
>
> ---
>   ipc/sem.c |   56 ++++++++++++++++++++++++++++++++++++++------------------
>   1 file changed, 38 insertions(+), 18 deletions(-)
>
> Index: linux-2.6/ipc/sem.c
> ===================================================================
> --- linux-2.6.orig/ipc/sem.c
> +++ linux-2.6/ipc/sem.c
> @@ -247,11 +256,22 @@ static inline int sem_lock(struct sem_ar
>   		 */
>    lock_array:
>   		spin_lock(&sma->sem_perm.lock);
> + wait_array:
>   		for (i = 0; i < sma->sem_nsems; i++) {
> -			struct sem *sem = sma->sem_base + i;
> +			sem = sma->sem_base + i;
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +			if (spin_is_locked(&sem->lock))
> +#endif
>   			spin_unlock_wait(&sem->lock);
>   		}
>   
I don't like this part of the change:
It reads like a micro-optimization for spin_unlock_wait() within the 
ipc/sem.c code.

If spin_unlock_wait() for CONFIG_PREEMPT_RT_BASE is broken, then the 
implementation of spin_unlock_wait() should be fixed.

Something like
#define spin_unlock_wait(x) if(spin_is_locked(x)) __spin_unlock_wait(x)

--
     Manfred

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-09-13  6:12 ` [patch 1/3] ipc/sem: fix -rt livelock Mike Galbraith
  2013-09-14 12:24   ` Manfred Spraul
@ 2013-09-14 21:46   ` Manfred Spraul
  2013-09-15  4:45     ` Mike Galbraith
  1 sibling, 1 reply; 15+ messages in thread
From: Manfred Spraul @ 2013-09-14 21:46 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Sebastian Andrzej Siewior, Peter Zijlstra

Hi Mike,

On 09/13/2013 08:12 AM, Mike Galbraith wrote:
> goto again loop can and does induce livelock in -rt.  Remove it.
>
> spin_unlock_wait(lock) in -rt kernels takes/releases the lock in question, so
> all it takes to create a self perpetuating loop is for one task to start the
> ball rolling by taking the array lock, other tasks see this, and react by
> take/release/retry endlessly.
I think your code inherits the race I just sent to you:
The test of complex_count must be after spin_is_locked().

http://marc.info/?l=linux-kernel&m=137919453307294

Could you check that?
Or alternatively: Is my proposed sem_lock() function -rt friendly?


>   		locknum = -1;
> +
> +		if (nsops == 1 && !sma->complex_count) {
> +			sem = sma->sem_base + sops->sem_num;
> +			spin_lock(&sem->lock);
> +			spin_unlock(&sma->sem_perm.lock);
> +			locknum = sops->sem_num;
> +		}
A clever idea:
If the decision that the slow path must be used proves to be a false 
alarm, switch back to the fast path.
You can even move that block further up and skip the loop over all 
per-semaphore arrays.

--
     Manfred

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-09-14 12:24   ` Manfred Spraul
@ 2013-09-15  3:41     ` Mike Galbraith
  0 siblings, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2013-09-15  3:41 UTC (permalink / raw)
  To: Manfred Spraul
  Cc: linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Sebastian Andrzej Siewior, Peter Zijlstra

On Sat, 2013-09-14 at 14:24 +0200, Manfred Spraul wrote: 
> Hi Mike,

Hi,

> > Index: linux-2.6/ipc/sem.c
> > ===================================================================
> > --- linux-2.6.orig/ipc/sem.c
> > +++ linux-2.6/ipc/sem.c
> > @@ -247,11 +256,22 @@ static inline int sem_lock(struct sem_ar
> >   		 */
> >    lock_array:
> >   		spin_lock(&sma->sem_perm.lock);
> > + wait_array:
> >   		for (i = 0; i < sma->sem_nsems; i++) {
> > -			struct sem *sem = sma->sem_base + i;
> > +			sem = sma->sem_base + i;
> > +#ifdef CONFIG_PREEMPT_RT_BASE
> > +			if (spin_is_locked(&sem->lock))
> > +#endif
> >   			spin_unlock_wait(&sem->lock);
> >   		}
> >   
> I don't like this part of the change:

None of it is pretty, but the livelock is even less pretty ;-)

> It reads like a micro-optimization for spin_unlock_wait() within the 
> ipc/sem.c code.

It's exactly that, hope to hammer fewer locks.

> If spin_unlock_wait() for CONFIG_PREEMPT_RT_BASE is broken, then the 
> implementation of spin_unlock_wait() should be fixed.

But it's not broken, taking the lock lets PI see/fix inversion.

Preemptible locks are (necessary) evil incarnate.

-Mike


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-09-14 21:46   ` Manfred Spraul
@ 2013-09-15  4:45     ` Mike Galbraith
  2013-10-04 10:44       ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2013-09-15  4:45 UTC (permalink / raw)
  To: Manfred Spraul
  Cc: linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Sebastian Andrzej Siewior, Peter Zijlstra

On Sat, 2013-09-14 at 23:46 +0200, Manfred Spraul wrote: 
> Hi Mike,
> 
> On 09/13/2013 08:12 AM, Mike Galbraith wrote:
> > goto again loop can and does induce livelock in -rt.  Remove it.
> >
> > spin_unlock_wait(lock) in -rt kernels takes/releases the lock in question, so
> > all it takes to create a self perpetuating loop is for one task to start the
> > ball rolling by taking the array lock, other tasks see this, and react by
> > take/release/retry endlessly.
> I think your code inherits the race I just sent to you:
> The test of complex_count must be after spin_is_locked().
> 
> http://marc.info/?l=linux-kernel&m=137919453307294
> 
> Could you check that?

I think you're right.

> Or alternatively: Is my proposed sem_lock() function -rt friendly?

Some way of making spin_unlock_wait() _go away_ along with the livelock
would be better, but patches look good to me.  I'll apply both and stare
at the sum, and ask boxen what they think.. they're better at spotting
locking troubles ;-)

-Mike


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-09-15  4:45     ` Mike Galbraith
@ 2013-10-04 10:44       ` Sebastian Andrzej Siewior
  2013-10-04 11:16         ` Mike Galbraith
  0 siblings, 1 reply; 15+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-10-04 10:44 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Manfred Spraul, linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Peter Zijlstra

* Mike Galbraith | 2013-09-15 06:45:40 [+0200]:

Hi,

>On Sat, 2013-09-14 at 23:46 +0200, Manfred Spraul wrote: 
>> Could you check that?
>
>I think you're right.
>
>> Or alternatively: Is my proposed sem_lock() function -rt friendly?
>
>Some way of making spin_unlock_wait() _go away_ along with the livelock
>would be better, but patches look good to me.  I'll apply both and stare
>at the sum, and ask boxen what they think.. they're better at spotting
>locking troubles ;-)

Do you post a new series of those three patches with the live lock
Manfred mentioned fixed or should I look at this serie?

>-Mike

Sebastian

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-10-04 10:44       ` Sebastian Andrzej Siewior
@ 2013-10-04 11:16         ` Mike Galbraith
  2013-10-04 11:28           ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2013-10-04 11:16 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Manfred Spraul, linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Peter Zijlstra

On Fri, 2013-10-04 at 12:44 +0200, Sebastian Andrzej Siewior wrote: 
> * Mike Galbraith | 2013-09-15 06:45:40 [+0200]:
> 
> Hi,
> 
> >On Sat, 2013-09-14 at 23:46 +0200, Manfred Spraul wrote: 
> >> Could you check that?
> >
> >I think you're right.
> >
> >> Or alternatively: Is my proposed sem_lock() function -rt friendly?
> >
> >Some way of making spin_unlock_wait() _go away_ along with the livelock
> >would be better, but patches look good to me.  I'll apply both and stare
> >at the sum, and ask boxen what they think.. they're better at spotting
> >locking troubles ;-)
> 
> Do you post a new series of those three patches with the live lock
> Manfred mentioned fixed or should I look at this serie?

Manfred's race fix also kills loop, and thereby the -rt livelock, so the
only question is does -rt want to use his completion wakeup scheme, and
does it want to do something about spin_unlock_wait() unconditionally
grabbing/releasing every lock in the array.

-Mike


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-10-04 11:16         ` Mike Galbraith
@ 2013-10-04 11:28           ` Sebastian Andrzej Siewior
  2013-10-04 13:55             ` Mike Galbraith
  0 siblings, 1 reply; 15+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-10-04 11:28 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Manfred Spraul, linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Peter Zijlstra

On 10/04/2013 01:16 PM, Mike Galbraith wrote:
> Manfred's race fix also kills loop, and thereby the -rt livelock, so the
> only question is does -rt want to use his completion wakeup scheme, and
> does it want to do something about spin_unlock_wait() unconditionally
> grabbing/releasing every lock in the array.

So his patches went into v3.11 or so right? So all -RT released post
this point will have it fixed right?
However if this is something that bugs people using -RT it might worth
looking at it.

> -Mike
> 
Sebastian

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-10-04 11:28           ` Sebastian Andrzej Siewior
@ 2013-10-04 13:55             ` Mike Galbraith
  2013-10-04 14:02               ` Manfred Spraul
  0 siblings, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2013-10-04 13:55 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Manfred Spraul, linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Peter Zijlstra

On Fri, 2013-10-04 at 13:28 +0200, Sebastian Andrzej Siewior wrote: 
> On 10/04/2013 01:16 PM, Mike Galbraith wrote:
> > Manfred's race fix also kills loop, and thereby the -rt livelock, so the
> > only question is does -rt want to use his completion wakeup scheme, and
> > does it want to do something about spin_unlock_wait() unconditionally
> > grabbing/releasing every lock in the array.
> 
> So his patches went into v3.11 or so right? So all -RT released post
> this point will have it fixed right?

Most fixes have gone into master.  There's still at least one in the
pipe, ipc/sem.c: synchronize semop and semctl with IPC_RMID, and I
_think_ there's still one open issue.

<quoting Manfred>
And now new:
1) ipc/namespace.c:
     free_ipcs() still assumes the "old style" free calls:
     rcu_lock and ipc_lock dropped within the callback.

     freeary() was converted - but free_ipcs was not updated.

     Thus:
     Closing a namespace with sem arrays and threads that are waiting
on 
the array with semtimedop() and bad timing can deadlock the semtimedop 
thread.
     (i.e.: spin_lock() waiting forever).
</quoting Manfred>

3.10,11-stable need the lot.

-Mike


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-10-04 13:55             ` Mike Galbraith
@ 2013-10-04 14:02               ` Manfred Spraul
  2013-10-04 14:08                 ` Mike Galbraith
  0 siblings, 1 reply; 15+ messages in thread
From: Manfred Spraul @ 2013-10-04 14:02 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	Thomas Gleixner, Peter Zijlstra

On 10/04/2013 03:55 PM, Mike Galbraith wrote:
> On Fri, 2013-10-04 at 13:28 +0200, Sebastian Andrzej Siewior wrote:
>> On 10/04/2013 01:16 PM, Mike Galbraith wrote:
>>> Manfred's race fix also kills loop, and thereby the -rt livelock, so the
>>> only question is does -rt want to use his completion wakeup scheme, and
>>> does it want to do something about spin_unlock_wait() unconditionally
>>> grabbing/releasing every lock in the array.
>> So his patches went into v3.11 or so right? So all -RT released post
>> this point will have it fixed right?
> Most fixes have gone into master.  There's still at least one in the
> pipe, ipc/sem.c: synchronize semop and semctl with IPC_RMID, and I
> _think_ there's still one open issue.
>
> <quoting Manfred>
> And now new:
> 1) ipc/namespace.c:
>       free_ipcs() still assumes the "old style" free calls:
>       rcu_lock and ipc_lock dropped within the callback.
>
>       freeary() was converted - but free_ipcs was not updated.
>
>       Thus:
>       Closing a namespace with sem arrays and threads that are waiting
> on
> the array with semtimedop() and bad timing can deadlock the semtimedop
> thread.
>       (i.e.: spin_lock() waiting forever).
> </quoting Manfred>
That one was a false alarm, i.e. everything is fixed in -mm.

--
	Manfred




^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-10-04 14:02               ` Manfred Spraul
@ 2013-10-04 14:08                 ` Mike Galbraith
  2013-10-11 15:36                   ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2013-10-04 14:08 UTC (permalink / raw)
  To: Manfred Spraul
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	Thomas Gleixner, Peter Zijlstra

On Fri, 2013-10-04 at 16:02 +0200, Manfred Spraul wrote: 
> On 10/04/2013 03:55 PM, Mike Galbraith wrote:
> > On Fri, 2013-10-04 at 13:28 +0200, Sebastian Andrzej Siewior wrote:
> >> On 10/04/2013 01:16 PM, Mike Galbraith wrote:
> >>> Manfred's race fix also kills loop, and thereby the -rt livelock, so the
> >>> only question is does -rt want to use his completion wakeup scheme, and
> >>> does it want to do something about spin_unlock_wait() unconditionally
> >>> grabbing/releasing every lock in the array.
> >> So his patches went into v3.11 or so right? So all -RT released post
> >> this point will have it fixed right?
> > Most fixes have gone into master.  There's still at least one in the
> > pipe, ipc/sem.c: synchronize semop and semctl with IPC_RMID, and I
> > _think_ there's still one open issue.
> >
> > <quoting Manfred>
> > And now new:
> > 1) ipc/namespace.c:
> >       free_ipcs() still assumes the "old style" free calls:
> >       rcu_lock and ipc_lock dropped within the callback.
> >
> >       freeary() was converted - but free_ipcs was not updated.
> >
> >       Thus:
> >       Closing a namespace with sem arrays and threads that are waiting
> > on
> > the array with semtimedop() and bad timing can deadlock the semtimedop
> > thread.
> >       (i.e.: spin_lock() waiting forever).
> > </quoting Manfred>
> That one was a false alarm, i.e. everything is fixed in -mm.

Cool, that works out rather nicely.  I looked at fixing it, and couldn't
figure out what the heck was wrong with it :)

-Mike


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-10-04 14:08                 ` Mike Galbraith
@ 2013-10-11 15:36                   ` Sebastian Andrzej Siewior
  2013-10-12  4:34                     ` Mike Galbraith
  0 siblings, 1 reply; 15+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-10-11 15:36 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Manfred Spraul, linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Peter Zijlstra

* Mike Galbraith | 2013-10-04 16:08:47 [+0200]:

>Cool, that works out rather nicely.  I looked at fixing it, and couldn't
>figure out what the heck was wrong with it :)

Is this the only one needed:

commit 5e9d527591421ccdb16acb8c23662231135d8686
Author: Manfred Spraul <manfred@colorfullife.com>
Date:   Mon Sep 30 13:45:04 2013 -0700

    ipc/sem.c: fix race in sem_lock()

Or is there more? This is the commit I've seen that is tagged stable and
for v3.10+.

>-Mike

Sebastian

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 1/3] ipc/sem: fix -rt livelock
  2013-10-11 15:36                   ` Sebastian Andrzej Siewior
@ 2013-10-12  4:34                     ` Mike Galbraith
  0 siblings, 0 replies; 15+ messages in thread
From: Mike Galbraith @ 2013-10-12  4:34 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Manfred Spraul, linux-rt-users, Steven Rostedt, Thomas Gleixner,
	Peter Zijlstra

On Fri, 2013-10-11 at 17:36 +0200, Sebastian Andrzej Siewior wrote:

> Is this the only one needed:
> 
> commit 5e9d527591421ccdb16acb8c23662231135d8686
> Author: Manfred Spraul <manfred@colorfullife.com>
> Date:   Mon Sep 30 13:45:04 2013 -0700
> 
>     ipc/sem.c: fix race in sem_lock()

That fixes the only killer I hit in generic beating.
> Or is there more? This is the commit I've seen that is tagged stable and
> for v3.10+.

It may also want part of this one... 
     53dad6d3a8e5ac1af8bacc6ac2134ae1a8b085f1 ipc: fix race with LSMs
..this one..
     d8c633766ad88527f25d9f81a5c2f083d78a2b39 ipc/sem.c: synchronize the proc interface
..and this one from mmotm.
     ipc-semc-synchronize-semop-and-semctl-with-ipc_rmid.patch

I didn't look hard though, that's just what sticks out when I look at my
ipc 3.10... wedged into 3.10-rt list.

-Mike


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2013-10-12  4:35 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1379051751.5455.112.camel@marge.simpson.net>
2013-09-13  6:12 ` [patch 1/3] ipc/sem: fix -rt livelock Mike Galbraith
2013-09-14 12:24   ` Manfred Spraul
2013-09-15  3:41     ` Mike Galbraith
2013-09-14 21:46   ` Manfred Spraul
2013-09-15  4:45     ` Mike Galbraith
2013-10-04 10:44       ` Sebastian Andrzej Siewior
2013-10-04 11:16         ` Mike Galbraith
2013-10-04 11:28           ` Sebastian Andrzej Siewior
2013-10-04 13:55             ` Mike Galbraith
2013-10-04 14:02               ` Manfred Spraul
2013-10-04 14:08                 ` Mike Galbraith
2013-10-11 15:36                   ` Sebastian Andrzej Siewior
2013-10-12  4:34                     ` Mike Galbraith
2013-09-13  6:12 ` [patch 2/3] ipc/sem: revert ipc/sem: Rework semaphore wakeups Mike Galbraith
2013-09-13  6:12 ` [patch 3/3] ipc/sem: Add -RT compatible wakeup scheme Mike Galbraith

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.