All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
       [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
  2013-11-08 19:51   ` Tim Chen
@ 2013-11-08 19:51   ` Tim Chen
  2013-11-08 19:52   ` Tim Chen
  2013-11-08 19:52   ` Tim Chen
  3 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:51 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

We will need the MCS lock code for doing optimistic spinning for rwsem
and queue rwlock.  Extracting the MCS code from mutex.c and put into
its own file allow us to reuse this code easily.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
---
 include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mutex.h        |    5 ++-
 kernel/locking/mutex.c       |   60 ++++----------------------------------
 3 files changed, 74 insertions(+), 55 deletions(-)
 create mode 100644 include/linux/mcs_spinlock.h

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
new file mode 100644
index 0000000..b5de3b0
--- /dev/null
+++ b/include/linux/mcs_spinlock.h
@@ -0,0 +1,64 @@
+/*
+ * MCS lock defines
+ *
+ * This file contains the main data structure and API definitions of MCS lock.
+ *
+ * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
+ * with the desirable properties of being fair, and with each cpu trying
+ * to acquire the lock spinning on a local variable.
+ * It avoids expensive cache bouncings that common test-and-set spin-lock
+ * implementations incur.
+ */
+#ifndef __LINUX_MCS_SPINLOCK_H
+#define __LINUX_MCS_SPINLOCK_H
+
+struct mcs_spinlock {
+	struct mcs_spinlock *next;
+	int locked; /* 1 if lock acquired */
+};
+
+/*
+ * We don't inline mcs_spin_lock() so that perf can correctly account for the
+ * time spent in this lock function.
+ */
+static noinline
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+	struct mcs_spinlock *prev;
+
+	/* Init node */
+	node->locked = 0;
+	node->next   = NULL;
+
+	prev = xchg(lock, node);
+	if (likely(prev == NULL)) {
+		/* Lock acquired */
+		node->locked = 1;
+		return;
+	}
+	ACCESS_ONCE(prev->next) = node;
+	smp_wmb();
+	/* Wait until the lock holder passes the lock down */
+	while (!ACCESS_ONCE(node->locked))
+		arch_mutex_cpu_relax();
+}
+
+static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+
+	if (likely(!next)) {
+		/*
+		 * Release the lock by setting it to NULL
+		 */
+		if (cmpxchg(lock, node, NULL) == node)
+			return;
+		/* Wait until the next pointer is set */
+		while (!(next = ACCESS_ONCE(node->next)))
+			arch_mutex_cpu_relax();
+	}
+	ACCESS_ONCE(next->locked) = 1;
+	smp_wmb();
+}
+
+#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bab49da..32a32e6 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -46,6 +46,7 @@
  * - detects multi-task circular deadlocks and prints out all affected
  *   locks and tasks (and only those tasks)
  */
+struct mcs_spinlock;
 struct mutex {
 	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
 	atomic_t		count;
@@ -55,7 +56,7 @@ struct mutex {
 	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	void			*spin_mlock;	/* Spinner MCS lock */
+	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
@@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 # define arch_mutex_cpu_relax() cpu_relax()
 #endif
 
-#endif
+#endif /* __LINUX_MUTEX_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d24105b..e08b183 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
+#include <linux/mcs_spinlock.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 	INIT_LIST_HEAD(&lock->wait_list);
 	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	lock->spin_mlock = NULL;
+	lock->mcs_lock = NULL;
 #endif
 
 	debug_mutex_init(lock, name, key);
@@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
  * more or less simultaneously, the spinners need to acquire a MCS lock
  * first before spinning on the owner field.
  *
- * We don't inline mspin_lock() so that perf can correctly account for the
- * time spent in this lock function.
  */
-struct mspin_node {
-	struct mspin_node *next ;
-	int		  locked;	/* 1 if lock acquired */
-};
-#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
-
-static noinline
-void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
-{
-	struct mspin_node *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/* Lock acquired */
-		node->locked = 1;
-		return;
-	}
-	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
-		arch_mutex_cpu_relax();
-}
-
-static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
-{
-	struct mspin_node *next = ACCESS_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (cmpxchg(lock, node, NULL) == node)
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
-	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
-}
 
 /*
  * Mutex spinning code migrated from kernel/sched/core.c
@@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
 	for (;;) {
 		struct task_struct *owner;
-		struct mspin_node  node;
+		struct mcs_spinlock  node;
 
 		if (use_ww_ctx && ww_ctx->acquired > 0) {
 			struct ww_mutex *ww;
@@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
 		 */
-		mspin_lock(MLOCK(lock), &node);
+		mcs_spin_lock(&lock->mcs_lock, &node);
 		owner = ACCESS_ONCE(lock->owner);
 		if (owner && !mutex_spin_on_owner(lock, owner)) {
-			mspin_unlock(MLOCK(lock), &node);
+			mcs_spin_unlock(&lock->mcs_lock, &node);
 			goto slowpath;
 		}
 
@@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 			}
 
 			mutex_set_owner(lock);
-			mspin_unlock(MLOCK(lock), &node);
+			mcs_spin_unlock(&lock->mcs_lock, &node);
 			preempt_enable();
 			return 0;
 		}
-		mspin_unlock(MLOCK(lock), &node);
+		mcs_spin_unlock(&lock->mcs_lock, &node);
 
 		/*
 		 * When there's no owner, we might have preempted between the
-- 
1.7.4.4




^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
@ 2013-11-08 19:51   ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:51 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton

We will need the MCS lock code for doing optimistic spinning for rwsem
and queue rwlock.  Extracting the MCS code from mutex.c and put into
its own file allow us to reuse this code easily.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
---
 include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mutex.h        |    5 ++-
 kernel/locking/mutex.c       |   60 ++++----------------------------------
 3 files changed, 74 insertions(+), 55 deletions(-)
 create mode 100644 include/linux/mcs_spinlock.h

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
new file mode 100644
index 0000000..b5de3b0
--- /dev/null
+++ b/include/linux/mcs_spinlock.h
@@ -0,0 +1,64 @@
+/*
+ * MCS lock defines
+ *
+ * This file contains the main data structure and API definitions of MCS lock.
+ *
+ * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
+ * with the desirable properties of being fair, and with each cpu trying
+ * to acquire the lock spinning on a local variable.
+ * It avoids expensive cache bouncings that common test-and-set spin-lock
+ * implementations incur.
+ */
+#ifndef __LINUX_MCS_SPINLOCK_H
+#define __LINUX_MCS_SPINLOCK_H
+
+struct mcs_spinlock {
+	struct mcs_spinlock *next;
+	int locked; /* 1 if lock acquired */
+};
+
+/*
+ * We don't inline mcs_spin_lock() so that perf can correctly account for the
+ * time spent in this lock function.
+ */
+static noinline
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+	struct mcs_spinlock *prev;
+
+	/* Init node */
+	node->locked = 0;
+	node->next   = NULL;
+
+	prev = xchg(lock, node);
+	if (likely(prev == NULL)) {
+		/* Lock acquired */
+		node->locked = 1;
+		return;
+	}
+	ACCESS_ONCE(prev->next) = node;
+	smp_wmb();
+	/* Wait until the lock holder passes the lock down */
+	while (!ACCESS_ONCE(node->locked))
+		arch_mutex_cpu_relax();
+}
+
+static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+
+	if (likely(!next)) {
+		/*
+		 * Release the lock by setting it to NULL
+		 */
+		if (cmpxchg(lock, node, NULL) == node)
+			return;
+		/* Wait until the next pointer is set */
+		while (!(next = ACCESS_ONCE(node->next)))
+			arch_mutex_cpu_relax();
+	}
+	ACCESS_ONCE(next->locked) = 1;
+	smp_wmb();
+}
+
+#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bab49da..32a32e6 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -46,6 +46,7 @@
  * - detects multi-task circular deadlocks and prints out all affected
  *   locks and tasks (and only those tasks)
  */
+struct mcs_spinlock;
 struct mutex {
 	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
 	atomic_t		count;
@@ -55,7 +56,7 @@ struct mutex {
 	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	void			*spin_mlock;	/* Spinner MCS lock */
+	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
@@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 # define arch_mutex_cpu_relax() cpu_relax()
 #endif
 
-#endif
+#endif /* __LINUX_MUTEX_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d24105b..e08b183 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
+#include <linux/mcs_spinlock.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 	INIT_LIST_HEAD(&lock->wait_list);
 	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	lock->spin_mlock = NULL;
+	lock->mcs_lock = NULL;
 #endif
 
 	debug_mutex_init(lock, name, key);
@@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
  * more or less simultaneously, the spinners need to acquire a MCS lock
  * first before spinning on the owner field.
  *
- * We don't inline mspin_lock() so that perf can correctly account for the
- * time spent in this lock function.
  */
-struct mspin_node {
-	struct mspin_node *next ;
-	int		  locked;	/* 1 if lock acquired */
-};
-#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
-
-static noinline
-void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
-{
-	struct mspin_node *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/* Lock acquired */
-		node->locked = 1;
-		return;
-	}
-	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
-		arch_mutex_cpu_relax();
-}
-
-static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
-{
-	struct mspin_node *next = ACCESS_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (cmpxchg(lock, node, NULL) == node)
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
-	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
-}
 
 /*
  * Mutex spinning code migrated from kernel/sched/core.c
@@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
 	for (;;) {
 		struct task_struct *owner;
-		struct mspin_node  node;
+		struct mcs_spinlock  node;
 
 		if (use_ww_ctx && ww_ctx->acquired > 0) {
 			struct ww_mutex *ww;
@@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
 		 */
-		mspin_lock(MLOCK(lock), &node);
+		mcs_spin_lock(&lock->mcs_lock, &node);
 		owner = ACCESS_ONCE(lock->owner);
 		if (owner && !mutex_spin_on_owner(lock, owner)) {
-			mspin_unlock(MLOCK(lock), &node);
+			mcs_spin_unlock(&lock->mcs_lock, &node);
 			goto slowpath;
 		}
 
@@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 			}
 
 			mutex_set_owner(lock);
-			mspin_unlock(MLOCK(lock), &node);
+			mcs_spin_unlock(&lock->mcs_lock, &node);
 			preempt_enable();
 			return 0;
 		}
-		mspin_unlock(MLOCK(lock), &node);
+		mcs_spin_unlock(&lock->mcs_lock, &node);
 
 		/*
 		 * When there's no owner, we might have preempted between the
-- 
1.7.4.4



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
@ 2013-11-08 19:51   ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:51 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

We will need the MCS lock code for doing optimistic spinning for rwsem
and queue rwlock.  Extracting the MCS code from mutex.c and put into
its own file allow us to reuse this code easily.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
---
 include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mutex.h        |    5 ++-
 kernel/locking/mutex.c       |   60 ++++----------------------------------
 3 files changed, 74 insertions(+), 55 deletions(-)
 create mode 100644 include/linux/mcs_spinlock.h

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
new file mode 100644
index 0000000..b5de3b0
--- /dev/null
+++ b/include/linux/mcs_spinlock.h
@@ -0,0 +1,64 @@
+/*
+ * MCS lock defines
+ *
+ * This file contains the main data structure and API definitions of MCS lock.
+ *
+ * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
+ * with the desirable properties of being fair, and with each cpu trying
+ * to acquire the lock spinning on a local variable.
+ * It avoids expensive cache bouncings that common test-and-set spin-lock
+ * implementations incur.
+ */
+#ifndef __LINUX_MCS_SPINLOCK_H
+#define __LINUX_MCS_SPINLOCK_H
+
+struct mcs_spinlock {
+	struct mcs_spinlock *next;
+	int locked; /* 1 if lock acquired */
+};
+
+/*
+ * We don't inline mcs_spin_lock() so that perf can correctly account for the
+ * time spent in this lock function.
+ */
+static noinline
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+	struct mcs_spinlock *prev;
+
+	/* Init node */
+	node->locked = 0;
+	node->next   = NULL;
+
+	prev = xchg(lock, node);
+	if (likely(prev == NULL)) {
+		/* Lock acquired */
+		node->locked = 1;
+		return;
+	}
+	ACCESS_ONCE(prev->next) = node;
+	smp_wmb();
+	/* Wait until the lock holder passes the lock down */
+	while (!ACCESS_ONCE(node->locked))
+		arch_mutex_cpu_relax();
+}
+
+static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+
+	if (likely(!next)) {
+		/*
+		 * Release the lock by setting it to NULL
+		 */
+		if (cmpxchg(lock, node, NULL) == node)
+			return;
+		/* Wait until the next pointer is set */
+		while (!(next = ACCESS_ONCE(node->next)))
+			arch_mutex_cpu_relax();
+	}
+	ACCESS_ONCE(next->locked) = 1;
+	smp_wmb();
+}
+
+#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bab49da..32a32e6 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -46,6 +46,7 @@
  * - detects multi-task circular deadlocks and prints out all affected
  *   locks and tasks (and only those tasks)
  */
+struct mcs_spinlock;
 struct mutex {
 	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
 	atomic_t		count;
@@ -55,7 +56,7 @@ struct mutex {
 	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	void			*spin_mlock;	/* Spinner MCS lock */
+	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
@@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 # define arch_mutex_cpu_relax() cpu_relax()
 #endif
 
-#endif
+#endif /* __LINUX_MUTEX_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d24105b..e08b183 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
+#include <linux/mcs_spinlock.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 	INIT_LIST_HEAD(&lock->wait_list);
 	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	lock->spin_mlock = NULL;
+	lock->mcs_lock = NULL;
 #endif
 
 	debug_mutex_init(lock, name, key);
@@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
  * more or less simultaneously, the spinners need to acquire a MCS lock
  * first before spinning on the owner field.
  *
- * We don't inline mspin_lock() so that perf can correctly account for the
- * time spent in this lock function.
  */
-struct mspin_node {
-	struct mspin_node *next ;
-	int		  locked;	/* 1 if lock acquired */
-};
-#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
-
-static noinline
-void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
-{
-	struct mspin_node *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/* Lock acquired */
-		node->locked = 1;
-		return;
-	}
-	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
-		arch_mutex_cpu_relax();
-}
-
-static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
-{
-	struct mspin_node *next = ACCESS_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (cmpxchg(lock, node, NULL) == node)
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
-	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
-}
 
 /*
  * Mutex spinning code migrated from kernel/sched/core.c
@@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
 	for (;;) {
 		struct task_struct *owner;
-		struct mspin_node  node;
+		struct mcs_spinlock  node;
 
 		if (use_ww_ctx && ww_ctx->acquired > 0) {
 			struct ww_mutex *ww;
@@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
 		 */
-		mspin_lock(MLOCK(lock), &node);
+		mcs_spin_lock(&lock->mcs_lock, &node);
 		owner = ACCESS_ONCE(lock->owner);
 		if (owner && !mutex_spin_on_owner(lock, owner)) {
-			mspin_unlock(MLOCK(lock), &node);
+			mcs_spin_unlock(&lock->mcs_lock, &node);
 			goto slowpath;
 		}
 
@@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 			}
 
 			mutex_set_owner(lock);
-			mspin_unlock(MLOCK(lock), &node);
+			mcs_spin_unlock(&lock->mcs_lock, &node);
 			preempt_enable();
 			return 0;
 		}
-		mspin_unlock(MLOCK(lock), &node);
+		mcs_spin_unlock(&lock->mcs_lock, &node);
 
 		/*
 		 * When there's no owner, we might have preempted between the
-- 
1.7.4.4



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 2/4] MCS Lock: optimizations and extra comments
       [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
  2013-11-08 19:51   ` Tim Chen
@ 2013-11-08 19:52   ` Tim Chen
  2013-11-08 19:52   ` Tim Chen
  2013-11-08 19:52   ` Tim Chen
  3 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Jason Low <jason.low2@hp.com>

Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
check in mcs_spin_unlock() likely() as it is likely that a race did not occur
most of the time.

Also add in more comments describing how the local node is used in MCS locks.

Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
---
 include/linux/mcs_spinlock.h |   13 +++++++++++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index b5de3b0..96f14299 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -18,6 +18,12 @@ struct mcs_spinlock {
 };
 
 /*
+ * In order to acquire the lock, the caller should declare a local node and
+ * pass a reference of the node to this function in addition to the lock.
+ * If the lock has already been acquired, then this will proceed to spin
+ * on this node->locked until the previous lock holder sets the node->locked
+ * in mcs_spin_unlock().
+ *
  * We don't inline mcs_spin_lock() so that perf can correctly account for the
  * time spent in this lock function.
  */
@@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	prev = xchg(lock, node);
 	if (likely(prev == NULL)) {
 		/* Lock acquired */
-		node->locked = 1;
 		return;
 	}
 	ACCESS_ONCE(prev->next) = node;
@@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 		arch_mutex_cpu_relax();
 }
 
+/*
+ * Releases the lock. The caller should pass in the corresponding node that
+ * was used to acquire the lock.
+ */
 static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
@@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 		/*
 		 * Release the lock by setting it to NULL
 		 */
-		if (cmpxchg(lock, node, NULL) == node)
+		if (likely(cmpxchg(lock, node, NULL) == node))
 			return;
 		/* Wait until the next pointer is set */
 		while (!(next = ACCESS_ONCE(node->next)))
-- 
1.7.4.4




^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-08 19:52   ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton

From: Jason Low <jason.low2@hp.com>

Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
check in mcs_spin_unlock() likely() as it is likely that a race did not occur
most of the time.

Also add in more comments describing how the local node is used in MCS locks.

Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
---
 include/linux/mcs_spinlock.h |   13 +++++++++++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index b5de3b0..96f14299 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -18,6 +18,12 @@ struct mcs_spinlock {
 };
 
 /*
+ * In order to acquire the lock, the caller should declare a local node and
+ * pass a reference of the node to this function in addition to the lock.
+ * If the lock has already been acquired, then this will proceed to spin
+ * on this node->locked until the previous lock holder sets the node->locked
+ * in mcs_spin_unlock().
+ *
  * We don't inline mcs_spin_lock() so that perf can correctly account for the
  * time spent in this lock function.
  */
@@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	prev = xchg(lock, node);
 	if (likely(prev == NULL)) {
 		/* Lock acquired */
-		node->locked = 1;
 		return;
 	}
 	ACCESS_ONCE(prev->next) = node;
@@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 		arch_mutex_cpu_relax();
 }
 
+/*
+ * Releases the lock. The caller should pass in the corresponding node that
+ * was used to acquire the lock.
+ */
 static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
@@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 		/*
 		 * Release the lock by setting it to NULL
 		 */
-		if (cmpxchg(lock, node, NULL) == node)
+		if (likely(cmpxchg(lock, node, NULL) == node))
 			return;
 		/* Wait until the next pointer is set */
 		while (!(next = ACCESS_ONCE(node->next)))
-- 
1.7.4.4



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-08 19:52   ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Jason Low <jason.low2@hp.com>

Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
check in mcs_spin_unlock() likely() as it is likely that a race did not occur
most of the time.

Also add in more comments describing how the local node is used in MCS locks.

Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
---
 include/linux/mcs_spinlock.h |   13 +++++++++++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index b5de3b0..96f14299 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -18,6 +18,12 @@ struct mcs_spinlock {
 };
 
 /*
+ * In order to acquire the lock, the caller should declare a local node and
+ * pass a reference of the node to this function in addition to the lock.
+ * If the lock has already been acquired, then this will proceed to spin
+ * on this node->locked until the previous lock holder sets the node->locked
+ * in mcs_spin_unlock().
+ *
  * We don't inline mcs_spin_lock() so that perf can correctly account for the
  * time spent in this lock function.
  */
@@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	prev = xchg(lock, node);
 	if (likely(prev == NULL)) {
 		/* Lock acquired */
-		node->locked = 1;
 		return;
 	}
 	ACCESS_ONCE(prev->next) = node;
@@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 		arch_mutex_cpu_relax();
 }
 
+/*
+ * Releases the lock. The caller should pass in the corresponding node that
+ * was used to acquire the lock.
+ */
 static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
@@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 		/*
 		 * Release the lock by setting it to NULL
 		 */
-		if (cmpxchg(lock, node, NULL) == node)
+		if (likely(cmpxchg(lock, node, NULL) == node))
 			return;
 		/* Wait until the next pointer is set */
 		while (!(next = ACCESS_ONCE(node->next)))
-- 
1.7.4.4



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file
       [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
  2013-11-08 19:51   ` Tim Chen
@ 2013-11-08 19:52   ` Tim Chen
  2013-11-08 19:52   ` Tim Chen
  2013-11-08 19:52   ` Tim Chen
  3 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Waiman Long <Waiman.Long@hp.com>

The following changes are made:

1) Create a new mcs_spinlock.c file to contain the
   mcs_spin_lock() and mcs_spin_unlock() function.
2) Include a number of prerequisite header files and define
   arch_mutex_cpu_relax(), if not previously defined so the
   mcs functions can be compiled for multiple architecture without
   causing problems.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 include/linux/mcs_spinlock.h                       |   56 ++------------------
 kernel/locking/Makefile                            |    6 +-
 .../locking/mcs_spinlock.c                         |   31 ++++++-----
 3 files changed, 23 insertions(+), 70 deletions(-)
 copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index 96f14299..d54bb23 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -17,57 +17,9 @@ struct mcs_spinlock {
 	int locked; /* 1 if lock acquired */
 };
 
-/*
- * In order to acquire the lock, the caller should declare a local node and
- * pass a reference of the node to this function in addition to the lock.
- * If the lock has already been acquired, then this will proceed to spin
- * on this node->locked until the previous lock holder sets the node->locked
- * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the
- * time spent in this lock function.
- */
-static noinline
-void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/* Lock acquired */
-		return;
-	}
-	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
-		arch_mutex_cpu_relax();
-}
-
-/*
- * Releases the lock. The caller should pass in the corresponding node that
- * was used to acquire the lock.
- */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (likely(cmpxchg(lock, node, NULL) == node))
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
-	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
-}
+extern
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
+extern
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
 
 #endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index baab8e5..20d9d5c 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
 ifeq ($(CONFIG_PROC_FS),y)
 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
 endif
-obj-$(CONFIG_SMP) += spinlock.o
-obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
-obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
similarity index 78%
copy from include/linux/mcs_spinlock.h
copy to kernel/locking/mcs_spinlock.c
index 96f14299..b6f27f8 100644
--- a/include/linux/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.c
@@ -1,7 +1,5 @@
 /*
- * MCS lock defines
- *
- * This file contains the main data structure and API definitions of MCS lock.
+ * MCS lock
  *
  * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
  * with the desirable properties of being fair, and with each cpu trying
@@ -9,13 +7,20 @@
  * It avoids expensive cache bouncings that common test-and-set spin-lock
  * implementations incur.
  */
-#ifndef __LINUX_MCS_SPINLOCK_H
-#define __LINUX_MCS_SPINLOCK_H
+/*
+ * asm/processor.h may define arch_mutex_cpu_relax().
+ * If it is not defined, cpu_relax() will be used.
+ */
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+#include <asm/processor.h>
+#include <linux/compiler.h>
+#include <linux/mcs_spinlock.h>
+#include <linux/export.h>
 
-struct mcs_spinlock {
-	struct mcs_spinlock *next;
-	int locked; /* 1 if lock acquired */
-};
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
 
 /*
  * In order to acquire the lock, the caller should declare a local node and
@@ -23,11 +28,7 @@ struct mcs_spinlock {
  * If the lock has already been acquired, then this will proceed to spin
  * on this node->locked until the previous lock holder sets the node->locked
  * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the
- * time spent in this lock function.
  */
-static noinline
 void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *prev;
@@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	while (!ACCESS_ONCE(node->locked))
 		arch_mutex_cpu_relax();
 }
+EXPORT_SYMBOL_GPL(mcs_spin_lock);
 
 /*
  * Releases the lock. The caller should pass in the corresponding node that
@@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 	ACCESS_ONCE(next->locked) = 1;
 	smp_wmb();
 }
-
-#endif /* __LINUX_MCS_SPINLOCK_H */
+EXPORT_SYMBOL_GPL(mcs_spin_unlock);
-- 
1.7.4.4




^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file
@ 2013-11-08 19:52   ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton

From: Waiman Long <Waiman.Long@hp.com>

The following changes are made:

1) Create a new mcs_spinlock.c file to contain the
   mcs_spin_lock() and mcs_spin_unlock() function.
2) Include a number of prerequisite header files and define
   arch_mutex_cpu_relax(), if not previously defined so the
   mcs functions can be compiled for multiple architecture without
   causing problems.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 include/linux/mcs_spinlock.h                       |   56 ++------------------
 kernel/locking/Makefile                            |    6 +-
 .../locking/mcs_spinlock.c                         |   31 ++++++-----
 3 files changed, 23 insertions(+), 70 deletions(-)
 copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index 96f14299..d54bb23 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -17,57 +17,9 @@ struct mcs_spinlock {
 	int locked; /* 1 if lock acquired */
 };
 
-/*
- * In order to acquire the lock, the caller should declare a local node and
- * pass a reference of the node to this function in addition to the lock.
- * If the lock has already been acquired, then this will proceed to spin
- * on this node->locked until the previous lock holder sets the node->locked
- * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the
- * time spent in this lock function.
- */
-static noinline
-void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/* Lock acquired */
-		return;
-	}
-	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
-		arch_mutex_cpu_relax();
-}
-
-/*
- * Releases the lock. The caller should pass in the corresponding node that
- * was used to acquire the lock.
- */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (likely(cmpxchg(lock, node, NULL) == node))
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
-	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
-}
+extern
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
+extern
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
 
 #endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index baab8e5..20d9d5c 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
 ifeq ($(CONFIG_PROC_FS),y)
 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
 endif
-obj-$(CONFIG_SMP) += spinlock.o
-obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
-obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
similarity index 78%
copy from include/linux/mcs_spinlock.h
copy to kernel/locking/mcs_spinlock.c
index 96f14299..b6f27f8 100644
--- a/include/linux/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.c
@@ -1,7 +1,5 @@
 /*
- * MCS lock defines
- *
- * This file contains the main data structure and API definitions of MCS lock.
+ * MCS lock
  *
  * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
  * with the desirable properties of being fair, and with each cpu trying
@@ -9,13 +7,20 @@
  * It avoids expensive cache bouncings that common test-and-set spin-lock
  * implementations incur.
  */
-#ifndef __LINUX_MCS_SPINLOCK_H
-#define __LINUX_MCS_SPINLOCK_H
+/*
+ * asm/processor.h may define arch_mutex_cpu_relax().
+ * If it is not defined, cpu_relax() will be used.
+ */
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+#include <asm/processor.h>
+#include <linux/compiler.h>
+#include <linux/mcs_spinlock.h>
+#include <linux/export.h>
 
-struct mcs_spinlock {
-	struct mcs_spinlock *next;
-	int locked; /* 1 if lock acquired */
-};
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
 
 /*
  * In order to acquire the lock, the caller should declare a local node and
@@ -23,11 +28,7 @@ struct mcs_spinlock {
  * If the lock has already been acquired, then this will proceed to spin
  * on this node->locked until the previous lock holder sets the node->locked
  * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the
- * time spent in this lock function.
  */
-static noinline
 void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *prev;
@@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	while (!ACCESS_ONCE(node->locked))
 		arch_mutex_cpu_relax();
 }
+EXPORT_SYMBOL_GPL(mcs_spin_lock);
 
 /*
  * Releases the lock. The caller should pass in the corresponding node that
@@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 	ACCESS_ONCE(next->locked) = 1;
 	smp_wmb();
 }
-
-#endif /* __LINUX_MCS_SPINLOCK_H */
+EXPORT_SYMBOL_GPL(mcs_spin_unlock);
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file
@ 2013-11-08 19:52   ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Waiman Long <Waiman.Long@hp.com>

The following changes are made:

1) Create a new mcs_spinlock.c file to contain the
   mcs_spin_lock() and mcs_spin_unlock() function.
2) Include a number of prerequisite header files and define
   arch_mutex_cpu_relax(), if not previously defined so the
   mcs functions can be compiled for multiple architecture without
   causing problems.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 include/linux/mcs_spinlock.h                       |   56 ++------------------
 kernel/locking/Makefile                            |    6 +-
 .../locking/mcs_spinlock.c                         |   31 ++++++-----
 3 files changed, 23 insertions(+), 70 deletions(-)
 copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index 96f14299..d54bb23 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -17,57 +17,9 @@ struct mcs_spinlock {
 	int locked; /* 1 if lock acquired */
 };
 
-/*
- * In order to acquire the lock, the caller should declare a local node and
- * pass a reference of the node to this function in addition to the lock.
- * If the lock has already been acquired, then this will proceed to spin
- * on this node->locked until the previous lock holder sets the node->locked
- * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the
- * time spent in this lock function.
- */
-static noinline
-void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/* Lock acquired */
-		return;
-	}
-	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
-		arch_mutex_cpu_relax();
-}
-
-/*
- * Releases the lock. The caller should pass in the corresponding node that
- * was used to acquire the lock.
- */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (likely(cmpxchg(lock, node, NULL) == node))
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
-	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
-}
+extern
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
+extern
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
 
 #endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index baab8e5..20d9d5c 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
 ifeq ($(CONFIG_PROC_FS),y)
 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
 endif
-obj-$(CONFIG_SMP) += spinlock.o
-obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
-obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
similarity index 78%
copy from include/linux/mcs_spinlock.h
copy to kernel/locking/mcs_spinlock.c
index 96f14299..b6f27f8 100644
--- a/include/linux/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.c
@@ -1,7 +1,5 @@
 /*
- * MCS lock defines
- *
- * This file contains the main data structure and API definitions of MCS lock.
+ * MCS lock
  *
  * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
  * with the desirable properties of being fair, and with each cpu trying
@@ -9,13 +7,20 @@
  * It avoids expensive cache bouncings that common test-and-set spin-lock
  * implementations incur.
  */
-#ifndef __LINUX_MCS_SPINLOCK_H
-#define __LINUX_MCS_SPINLOCK_H
+/*
+ * asm/processor.h may define arch_mutex_cpu_relax().
+ * If it is not defined, cpu_relax() will be used.
+ */
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+#include <asm/processor.h>
+#include <linux/compiler.h>
+#include <linux/mcs_spinlock.h>
+#include <linux/export.h>
 
-struct mcs_spinlock {
-	struct mcs_spinlock *next;
-	int locked; /* 1 if lock acquired */
-};
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
 
 /*
  * In order to acquire the lock, the caller should declare a local node and
@@ -23,11 +28,7 @@ struct mcs_spinlock {
  * If the lock has already been acquired, then this will proceed to spin
  * on this node->locked until the previous lock holder sets the node->locked
  * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the
- * time spent in this lock function.
  */
-static noinline
 void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *prev;
@@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	while (!ACCESS_ONCE(node->locked))
 		arch_mutex_cpu_relax();
 }
+EXPORT_SYMBOL_GPL(mcs_spin_lock);
 
 /*
  * Releases the lock. The caller should pass in the corresponding node that
@@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 	ACCESS_ONCE(next->locked) = 1;
 	smp_wmb();
 }
-
-#endif /* __LINUX_MCS_SPINLOCK_H */
+EXPORT_SYMBOL_GPL(mcs_spin_unlock);
-- 
1.7.4.4



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 4/4] MCS Lock: Barrier corrections
       [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
  2013-11-08 19:51   ` Tim Chen
@ 2013-11-08 19:52   ` Tim Chen
  2013-11-08 19:52   ` Tim Chen
  2013-11-08 19:52   ` Tim Chen
  3 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Waiman Long <Waiman.Long@hp.com>

This patch corrects the way memory barriers are used in the MCS lock
with smp_load_acquire and smp_store_release fucnction.
It removes ones that are not needed.

It uses architecture specific load-acquire and store-release
primitives for synchronization, if available. Generic implementations
are provided in case they are not defined even though they may not
be optimal. These generic implementation could be removed later on
once changes are made in all the relevant header files.

Suggested-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
 1 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index b6f27f8..df5c167 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -23,6 +23,31 @@
 #endif
 
 /*
+ * Fall back to use the regular atomic operations and memory barrier if
+ * the acquire/release versions are not defined.
+ */
+#ifndef	xchg_acquire
+# define xchg_acquire(p, v)		xchg(p, v)
+#endif
+
+#ifndef	smp_load_acquire
+# define smp_load_acquire(p)				\
+	({						\
+		typeof(*p) __v = ACCESS_ONCE(*(p));	\
+		smp_mb();				\
+		__v;					\
+	})
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v)		\
+	do {					\
+		smp_mb();			\
+		ACCESS_ONCE(*(p)) = v;		\
+	} while (0)
+#endif
+
+/*
  * In order to acquire the lock, the caller should declare a local node and
  * pass a reference of the node to this function in addition to the lock.
  * If the lock has already been acquired, then this will proceed to spin
@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg(lock, node);
+	/* xchg() provides a memory barrier */
+	prev = xchg_acquire(lock, node);
 	if (likely(prev == NULL)) {
 		/* Lock acquired */
 		return;
 	}
 	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
+	/*
+	 * Wait until the lock holder passes the lock down.
+	 * Using smp_load_acquire() provides a memory barrier that
+	 * ensures subsequent operations happen after the lock is acquired.
+	 */
+	while (!(smp_load_acquire(&node->locked)))
 		arch_mutex_cpu_relax();
 }
 EXPORT_SYMBOL_GPL(mcs_spin_lock);
@@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
  * Releases the lock. The caller should pass in the corresponding node that
  * was used to acquire the lock.
  */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
 
@@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 		while (!(next = ACCESS_ONCE(node->next)))
 			arch_mutex_cpu_relax();
 	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
+	/*
+	 * Pass lock to next waiter.
+	 * smp_store_release() provides a memory barrier to ensure
+	 * all operations in the critical section has been completed
+	 * before unlocking.
+	 */
+	smp_store_release(&next->locked , 1);
 }
 EXPORT_SYMBOL_GPL(mcs_spin_unlock);
-- 
1.7.4.4



^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-08 19:52   ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton

From: Waiman Long <Waiman.Long@hp.com>

This patch corrects the way memory barriers are used in the MCS lock
with smp_load_acquire and smp_store_release fucnction.
It removes ones that are not needed.

It uses architecture specific load-acquire and store-release
primitives for synchronization, if available. Generic implementations
are provided in case they are not defined even though they may not
be optimal. These generic implementation could be removed later on
once changes are made in all the relevant header files.

Suggested-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
 1 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index b6f27f8..df5c167 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -23,6 +23,31 @@
 #endif
 
 /*
+ * Fall back to use the regular atomic operations and memory barrier if
+ * the acquire/release versions are not defined.
+ */
+#ifndef	xchg_acquire
+# define xchg_acquire(p, v)		xchg(p, v)
+#endif
+
+#ifndef	smp_load_acquire
+# define smp_load_acquire(p)				\
+	({						\
+		typeof(*p) __v = ACCESS_ONCE(*(p));	\
+		smp_mb();				\
+		__v;					\
+	})
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v)		\
+	do {					\
+		smp_mb();			\
+		ACCESS_ONCE(*(p)) = v;		\
+	} while (0)
+#endif
+
+/*
  * In order to acquire the lock, the caller should declare a local node and
  * pass a reference of the node to this function in addition to the lock.
  * If the lock has already been acquired, then this will proceed to spin
@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg(lock, node);
+	/* xchg() provides a memory barrier */
+	prev = xchg_acquire(lock, node);
 	if (likely(prev == NULL)) {
 		/* Lock acquired */
 		return;
 	}
 	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
+	/*
+	 * Wait until the lock holder passes the lock down.
+	 * Using smp_load_acquire() provides a memory barrier that
+	 * ensures subsequent operations happen after the lock is acquired.
+	 */
+	while (!(smp_load_acquire(&node->locked)))
 		arch_mutex_cpu_relax();
 }
 EXPORT_SYMBOL_GPL(mcs_spin_lock);
@@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
  * Releases the lock. The caller should pass in the corresponding node that
  * was used to acquire the lock.
  */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
 
@@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 		while (!(next = ACCESS_ONCE(node->next)))
 			arch_mutex_cpu_relax();
 	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
+	/*
+	 * Pass lock to next waiter.
+	 * smp_store_release() provides a memory barrier to ensure
+	 * all operations in the critical section has been completed
+	 * before unlocking.
+	 */
+	smp_store_release(&next->locked , 1);
 }
 EXPORT_SYMBOL_GPL(mcs_spin_unlock);
-- 
1.7.4.4


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-08 19:52   ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Waiman Long <Waiman.Long@hp.com>

This patch corrects the way memory barriers are used in the MCS lock
with smp_load_acquire and smp_store_release fucnction.
It removes ones that are not needed.

It uses architecture specific load-acquire and store-release
primitives for synchronization, if available. Generic implementations
are provided in case they are not defined even though they may not
be optimal. These generic implementation could be removed later on
once changes are made in all the relevant header files.

Suggested-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
 1 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index b6f27f8..df5c167 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -23,6 +23,31 @@
 #endif
 
 /*
+ * Fall back to use the regular atomic operations and memory barrier if
+ * the acquire/release versions are not defined.
+ */
+#ifndef	xchg_acquire
+# define xchg_acquire(p, v)		xchg(p, v)
+#endif
+
+#ifndef	smp_load_acquire
+# define smp_load_acquire(p)				\
+	({						\
+		typeof(*p) __v = ACCESS_ONCE(*(p));	\
+		smp_mb();				\
+		__v;					\
+	})
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v)		\
+	do {					\
+		smp_mb();			\
+		ACCESS_ONCE(*(p)) = v;		\
+	} while (0)
+#endif
+
+/*
  * In order to acquire the lock, the caller should declare a local node and
  * pass a reference of the node to this function in addition to the lock.
  * If the lock has already been acquired, then this will proceed to spin
@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg(lock, node);
+	/* xchg() provides a memory barrier */
+	prev = xchg_acquire(lock, node);
 	if (likely(prev == NULL)) {
 		/* Lock acquired */
 		return;
 	}
 	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
+	/*
+	 * Wait until the lock holder passes the lock down.
+	 * Using smp_load_acquire() provides a memory barrier that
+	 * ensures subsequent operations happen after the lock is acquired.
+	 */
+	while (!(smp_load_acquire(&node->locked)))
 		arch_mutex_cpu_relax();
 }
 EXPORT_SYMBOL_GPL(mcs_spin_lock);
@@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
  * Releases the lock. The caller should pass in the corresponding node that
  * was used to acquire the lock.
  */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
 
@@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 		while (!(next = ACCESS_ONCE(node->next)))
 			arch_mutex_cpu_relax();
 	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
+	/*
+	 * Pass lock to next waiter.
+	 * smp_store_release() provides a memory barrier to ensure
+	 * all operations in the critical section has been completed
+	 * before unlocking.
+	 */
+	smp_store_release(&next->locked , 1);
 }
 EXPORT_SYMBOL_GPL(mcs_spin_unlock);
-- 
1.7.4.4


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-08 19:52   ` Tim Chen
@ 2013-11-11 18:10     ` Will Deacon
  -1 siblings, 0 replies; 70+ messages in thread
From: Will Deacon @ 2013-11-11 18:10 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T

Hello,

On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> This patch corrects the way memory barriers are used in the MCS lock
> with smp_load_acquire and smp_store_release fucnction.
> It removes ones that are not needed.
> 
> It uses architecture specific load-acquire and store-release
> primitives for synchronization, if available. Generic implementations
> are provided in case they are not defined even though they may not
> be optimal. These generic implementation could be removed later on
> once changes are made in all the relevant header files.
> 
> Suggested-by: Michel Lespinasse <walken@google.com>
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> ---
>  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
>  1 files changed, 41 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> index b6f27f8..df5c167 100644
> --- a/kernel/locking/mcs_spinlock.c
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -23,6 +23,31 @@
>  #endif
>  
>  /*
> + * Fall back to use the regular atomic operations and memory barrier if
> + * the acquire/release versions are not defined.
> + */
> +#ifndef	xchg_acquire
> +# define xchg_acquire(p, v)		xchg(p, v)
> +#endif
> +
> +#ifndef	smp_load_acquire
> +# define smp_load_acquire(p)				\
> +	({						\
> +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> +		smp_mb();				\
> +		__v;					\
> +	})
> +#endif
> +
> +#ifndef smp_store_release
> +# define smp_store_release(p, v)		\
> +	do {					\
> +		smp_mb();			\
> +		ACCESS_ONCE(*(p)) = v;		\
> +	} while (0)
> +#endif

PeterZ already has a series implementing acquire/release accessors, so you
should probably take a look at that rather than rolling your own here.

You could then augment that with [cmp]xchg_{acquire,release} as
appropriate.

> +/*
>   * In order to acquire the lock, the caller should declare a local node and
>   * pass a reference of the node to this function in addition to the lock.
>   * If the lock has already been acquired, then this will proceed to spin
> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	node->locked = 0;
>  	node->next   = NULL;
>  
> -	prev = xchg(lock, node);
> +	/* xchg() provides a memory barrier */
> +	prev = xchg_acquire(lock, node);
>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> +	/*
> +	 * Wait until the lock holder passes the lock down.
> +	 * Using smp_load_acquire() provides a memory barrier that
> +	 * ensures subsequent operations happen after the lock is acquired.
> +	 */
> +	while (!(smp_load_acquire(&node->locked)))
>  		arch_mutex_cpu_relax();

After a chat with some micro-architects, I'm going to have to disagree with
Paul here. For architectures where acquire/release are implemented with
explicit barriers (similarly for simple microarchitectures), emitting
barriers in a loop *is* going to have an affect on overall performance,
since those barriers may well result in traffic outside of the core (at
least, on ARM).

Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
doesn't have a corresponding hook on the unlock side. On ARM, for example,
we can enter a low-power state using the wfe instruction, but that requires
the unlocker to wake up the core when the lock is released.

So, although I'm completely in favour of introducing acquire/release
accessors, I really think the mcs locking routines would benefit from
some arch-specific backend code, even if it's optional (although I would
imagine most architectures implementing something to improve power and/or
performance).

Will

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-11 18:10     ` Will Deacon
  0 siblings, 0 replies; 70+ messages in thread
From: Will Deacon @ 2013-11-11 18:10 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T,
	George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang

Hello,

On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> This patch corrects the way memory barriers are used in the MCS lock
> with smp_load_acquire and smp_store_release fucnction.
> It removes ones that are not needed.
> 
> It uses architecture specific load-acquire and store-release
> primitives for synchronization, if available. Generic implementations
> are provided in case they are not defined even though they may not
> be optimal. These generic implementation could be removed later on
> once changes are made in all the relevant header files.
> 
> Suggested-by: Michel Lespinasse <walken@google.com>
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> ---
>  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
>  1 files changed, 41 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> index b6f27f8..df5c167 100644
> --- a/kernel/locking/mcs_spinlock.c
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -23,6 +23,31 @@
>  #endif
>  
>  /*
> + * Fall back to use the regular atomic operations and memory barrier if
> + * the acquire/release versions are not defined.
> + */
> +#ifndef	xchg_acquire
> +# define xchg_acquire(p, v)		xchg(p, v)
> +#endif
> +
> +#ifndef	smp_load_acquire
> +# define smp_load_acquire(p)				\
> +	({						\
> +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> +		smp_mb();				\
> +		__v;					\
> +	})
> +#endif
> +
> +#ifndef smp_store_release
> +# define smp_store_release(p, v)		\
> +	do {					\
> +		smp_mb();			\
> +		ACCESS_ONCE(*(p)) = v;		\
> +	} while (0)
> +#endif

PeterZ already has a series implementing acquire/release accessors, so you
should probably take a look at that rather than rolling your own here.

You could then augment that with [cmp]xchg_{acquire,release} as
appropriate.

> +/*
>   * In order to acquire the lock, the caller should declare a local node and
>   * pass a reference of the node to this function in addition to the lock.
>   * If the lock has already been acquired, then this will proceed to spin
> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	node->locked = 0;
>  	node->next   = NULL;
>  
> -	prev = xchg(lock, node);
> +	/* xchg() provides a memory barrier */
> +	prev = xchg_acquire(lock, node);
>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> +	/*
> +	 * Wait until the lock holder passes the lock down.
> +	 * Using smp_load_acquire() provides a memory barrier that
> +	 * ensures subsequent operations happen after the lock is acquired.
> +	 */
> +	while (!(smp_load_acquire(&node->locked)))
>  		arch_mutex_cpu_relax();

After a chat with some micro-architects, I'm going to have to disagree with
Paul here. For architectures where acquire/release are implemented with
explicit barriers (similarly for simple microarchitectures), emitting
barriers in a loop *is* going to have an affect on overall performance,
since those barriers may well result in traffic outside of the core (at
least, on ARM).

Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
doesn't have a corresponding hook on the unlock side. On ARM, for example,
we can enter a low-power state using the wfe instruction, but that requires
the unlocker to wake up the core when the lock is released.

So, although I'm completely in favour of introducing acquire/release
accessors, I really think the mcs locking routines would benefit from
some arch-specific backend code, even if it's optional (although I would
imagine most architectures implementing something to improve power and/or
performance).

Will

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-11 18:10     ` Will Deacon
@ 2013-11-11 18:20       ` Peter Zijlstra
  -1 siblings, 0 replies; 70+ messages in thread
From: Peter Zijlstra @ 2013-11-11 18:20 UTC (permalink / raw)
  To: Will Deacon
  Cc: Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel,
	Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin,
	H.Peter

On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote:
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();

> Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> doesn't have a corresponding hook on the unlock side. On ARM, for example,
> we can enter a low-power state using the wfe instruction, but that requires
> the unlocker to wake up the core when the lock is released.

That said, it would be ever so awesome if we could come to some sort of
conclusion on control dependencies here.

I _know_ C/C++ doesn't do them, but at the end of the day the compiler
still generates ASM and as long as we're relatively certain there's a
loop there (there has to be, right? :-), we could maybe rely on it
anyway.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-11 18:20       ` Peter Zijlstra
  0 siblings, 0 replies; 70+ messages in thread
From: Peter Zijlstra @ 2013-11-11 18:20 UTC (permalink / raw)
  To: Will Deacon
  Cc: Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel,
	Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Figo.zhang

On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote:
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();

> Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> doesn't have a corresponding hook on the unlock side. On ARM, for example,
> we can enter a low-power state using the wfe instruction, but that requires
> the unlocker to wake up the core when the lock is released.

That said, it would be ever so awesome if we could come to some sort of
conclusion on control dependencies here.

I _know_ C/C++ doesn't do them, but at the end of the day the compiler
still generates ASM and as long as we're relatively certain there's a
loop there (there has to be, right? :-), we could maybe rely on it
anyway.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-11 18:10     ` Will Deacon
@ 2013-11-11 21:17       ` Tim Chen
  -1 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-11 21:17 UTC (permalink / raw)
  To: Will Deacon
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T,
	George Spelvin, H.Peter

On Mon, 2013-11-11 at 18:10 +0000, Will Deacon wrote:
> Hello,
> 
> On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote:
> > From: Waiman Long <Waiman.Long@hp.com>
> > 
> > This patch corrects the way memory barriers are used in the MCS lock
> > with smp_load_acquire and smp_store_release fucnction.
> > It removes ones that are not needed.
> > 
> > It uses architecture specific load-acquire and store-release
> > primitives for synchronization, if available. Generic implementations
> > are provided in case they are not defined even though they may not
> > be optimal. These generic implementation could be removed later on
> > once changes are made in all the relevant header files.
> > 
> > Suggested-by: Michel Lespinasse <walken@google.com>
> > Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > ---
> >  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 41 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > index b6f27f8..df5c167 100644
> > --- a/kernel/locking/mcs_spinlock.c
> > +++ b/kernel/locking/mcs_spinlock.c
> > @@ -23,6 +23,31 @@
> >  #endif
> >  
> >  /*
> > + * Fall back to use the regular atomic operations and memory barrier if
> > + * the acquire/release versions are not defined.
> > + */
> > +#ifndef	xchg_acquire
> > +# define xchg_acquire(p, v)		xchg(p, v)
> > +#endif
> > +
> > +#ifndef	smp_load_acquire
> > +# define smp_load_acquire(p)				\
> > +	({						\
> > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > +		smp_mb();				\
> > +		__v;					\
> > +	})
> > +#endif
> > +
> > +#ifndef smp_store_release
> > +# define smp_store_release(p, v)		\
> > +	do {					\
> > +		smp_mb();			\
> > +		ACCESS_ONCE(*(p)) = v;		\
> > +	} while (0)
> > +#endif
> 
> PeterZ already has a series implementing acquire/release accessors, so you
> should probably take a look at that rather than rolling your own here.

Yes, we are using Peter Z's implementation here.  The above is for anything
where smp_load_acquire and smp_store_release are *not* defined.  We can
remove this once all architectures implement the acquire and release 
functions as mentioned in the comments of the patch.

> 
> You could then augment that with [cmp]xchg_{acquire,release} as
> appropriate.
> 
> > +/*
> >   * In order to acquire the lock, the caller should declare a local node and
> >   * pass a reference of the node to this function in addition to the lock.
> >   * If the lock has already been acquired, then this will proceed to spin
> > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	node->locked = 0;
> >  	node->next   = NULL;
> >  
> > -	prev = xchg(lock, node);
> > +	/* xchg() provides a memory barrier */
> > +	prev = xchg_acquire(lock, node);
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();
> 

An alternate implementation is
	while (!ACCESS_ONCE(node->locked))
		arch_mutex_cpu_relax();
	smp_load_acquire(&node->locked);

Leaving the smp_load_acquire at the end to provide appropriate barrier.
Will that be acceptable?

Tim

> After a chat with some micro-architects, I'm going to have to disagree with
> Paul here. For architectures where acquire/release are implemented with
> explicit barriers (similarly for simple microarchitectures), emitting
> barriers in a loop *is* going to have an affect on overall performance,
> since those barriers may well result in traffic outside of the core (at
> least, on ARM).
> 
> Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> doesn't have a corresponding hook on the unlock side. On ARM, for example,
> we can enter a low-power state using the wfe instruction, but that requires
> the unlocker to wake up the core when the lock is released.
> 
> So, although I'm completely in favour of introducing acquire/release
> accessors, I really think the mcs locking routines would benefit from
> some arch-specific backend code, even if it's optional (although I would
> imagine most architectures implementing something to improve power and/or
> performance).
> 
> Will


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-11 21:17       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-11 21:17 UTC (permalink / raw)
  To: Will Deacon
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T,
	George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang

On Mon, 2013-11-11 at 18:10 +0000, Will Deacon wrote:
> Hello,
> 
> On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote:
> > From: Waiman Long <Waiman.Long@hp.com>
> > 
> > This patch corrects the way memory barriers are used in the MCS lock
> > with smp_load_acquire and smp_store_release fucnction.
> > It removes ones that are not needed.
> > 
> > It uses architecture specific load-acquire and store-release
> > primitives for synchronization, if available. Generic implementations
> > are provided in case they are not defined even though they may not
> > be optimal. These generic implementation could be removed later on
> > once changes are made in all the relevant header files.
> > 
> > Suggested-by: Michel Lespinasse <walken@google.com>
> > Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > ---
> >  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 41 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > index b6f27f8..df5c167 100644
> > --- a/kernel/locking/mcs_spinlock.c
> > +++ b/kernel/locking/mcs_spinlock.c
> > @@ -23,6 +23,31 @@
> >  #endif
> >  
> >  /*
> > + * Fall back to use the regular atomic operations and memory barrier if
> > + * the acquire/release versions are not defined.
> > + */
> > +#ifndef	xchg_acquire
> > +# define xchg_acquire(p, v)		xchg(p, v)
> > +#endif
> > +
> > +#ifndef	smp_load_acquire
> > +# define smp_load_acquire(p)				\
> > +	({						\
> > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > +		smp_mb();				\
> > +		__v;					\
> > +	})
> > +#endif
> > +
> > +#ifndef smp_store_release
> > +# define smp_store_release(p, v)		\
> > +	do {					\
> > +		smp_mb();			\
> > +		ACCESS_ONCE(*(p)) = v;		\
> > +	} while (0)
> > +#endif
> 
> PeterZ already has a series implementing acquire/release accessors, so you
> should probably take a look at that rather than rolling your own here.

Yes, we are using Peter Z's implementation here.  The above is for anything
where smp_load_acquire and smp_store_release are *not* defined.  We can
remove this once all architectures implement the acquire and release 
functions as mentioned in the comments of the patch.

> 
> You could then augment that with [cmp]xchg_{acquire,release} as
> appropriate.
> 
> > +/*
> >   * In order to acquire the lock, the caller should declare a local node and
> >   * pass a reference of the node to this function in addition to the lock.
> >   * If the lock has already been acquired, then this will proceed to spin
> > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	node->locked = 0;
> >  	node->next   = NULL;
> >  
> > -	prev = xchg(lock, node);
> > +	/* xchg() provides a memory barrier */
> > +	prev = xchg_acquire(lock, node);
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();
> 

An alternate implementation is
	while (!ACCESS_ONCE(node->locked))
		arch_mutex_cpu_relax();
	smp_load_acquire(&node->locked);

Leaving the smp_load_acquire at the end to provide appropriate barrier.
Will that be acceptable?

Tim

> After a chat with some micro-architects, I'm going to have to disagree with
> Paul here. For architectures where acquire/release are implemented with
> explicit barriers (similarly for simple microarchitectures), emitting
> barriers in a loop *is* going to have an affect on overall performance,
> since those barriers may well result in traffic outside of the core (at
> least, on ARM).
> 
> Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> doesn't have a corresponding hook on the unlock side. On ARM, for example,
> we can enter a low-power state using the wfe instruction, but that requires
> the unlocker to wake up the core when the lock is released.
> 
> So, although I'm completely in favour of introducing acquire/release
> accessors, I really think the mcs locking routines would benefit from
> some arch-specific backend code, even if it's optional (although I would
> imagine most architectures implementing something to improve power and/or
> performance).
> 
> Will


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-11 21:17       ` Tim Chen
@ 2013-11-12  1:57         ` Waiman Long
  -1 siblings, 0 replies; 70+ messages in thread
From: Waiman Long @ 2013-11-12  1:57 UTC (permalink / raw)
  To: Tim Chen, Peter Zijlstra
  Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-mm, linux-arch, Linus Torvalds,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel,
	Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd

On 11/11/2013 04:17 PM, Tim Chen wrote:
>> You could then augment that with [cmp]xchg_{acquire,release} as
>> appropriate.
>>
>>> +/*
>>>    * In order to acquire the lock, the caller should declare a local node and
>>>    * pass a reference of the node to this function in addition to the lock.
>>>    * If the lock has already been acquired, then this will proceed to spin
>>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>>>   	node->locked = 0;
>>>   	node->next   = NULL;
>>>
>>> -	prev = xchg(lock, node);
>>> +	/* xchg() provides a memory barrier */
>>> +	prev = xchg_acquire(lock, node);
>>>   	if (likely(prev == NULL)) {
>>>   		/* Lock acquired */
>>>   		return;
>>>   	}
>>>   	ACCESS_ONCE(prev->next) = node;
>>> -	smp_wmb();
>>> -	/* Wait until the lock holder passes the lock down */
>>> -	while (!ACCESS_ONCE(node->locked))
>>> +	/*
>>> +	 * Wait until the lock holder passes the lock down.
>>> +	 * Using smp_load_acquire() provides a memory barrier that
>>> +	 * ensures subsequent operations happen after the lock is acquired.
>>> +	 */
>>> +	while (!(smp_load_acquire(&node->locked)))
>>>   		arch_mutex_cpu_relax();
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
>
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?
>
> Tim

I second Tim's opinion. It will be help to have a smp_mb_load_acquire() 
function that provide a memory barrier with load-acquire semantic. I 
don't think we need one for store-release as that will not be in a loop.

Peter, what do you think about adding that to your patch?

-Longman

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-12  1:57         ` Waiman Long
  0 siblings, 0 replies; 70+ messages in thread
From: Waiman Long @ 2013-11-12  1:57 UTC (permalink / raw)
  To: Tim Chen, Peter Zijlstra
  Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-mm, linux-arch, Linus Torvalds,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel,
	Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Figo.zhang

On 11/11/2013 04:17 PM, Tim Chen wrote:
>> You could then augment that with [cmp]xchg_{acquire,release} as
>> appropriate.
>>
>>> +/*
>>>    * In order to acquire the lock, the caller should declare a local node and
>>>    * pass a reference of the node to this function in addition to the lock.
>>>    * If the lock has already been acquired, then this will proceed to spin
>>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>>>   	node->locked = 0;
>>>   	node->next   = NULL;
>>>
>>> -	prev = xchg(lock, node);
>>> +	/* xchg() provides a memory barrier */
>>> +	prev = xchg_acquire(lock, node);
>>>   	if (likely(prev == NULL)) {
>>>   		/* Lock acquired */
>>>   		return;
>>>   	}
>>>   	ACCESS_ONCE(prev->next) = node;
>>> -	smp_wmb();
>>> -	/* Wait until the lock holder passes the lock down */
>>> -	while (!ACCESS_ONCE(node->locked))
>>> +	/*
>>> +	 * Wait until the lock holder passes the lock down.
>>> +	 * Using smp_load_acquire() provides a memory barrier that
>>> +	 * ensures subsequent operations happen after the lock is acquired.
>>> +	 */
>>> +	while (!(smp_load_acquire(&node->locked)))
>>>   		arch_mutex_cpu_relax();
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
>
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?
>
> Tim

I second Tim's opinion. It will be help to have a smp_mb_load_acquire() 
function that provide a memory barrier with load-acquire semantic. I 
don't think we need one for store-release as that will not be in a loop.

Peter, what do you think about adding that to your patch?

-Longman

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-11 21:17       ` Tim Chen
@ 2013-11-12  2:09         ` Waiman Long
  -1 siblings, 0 replies; 70+ messages in thread
From: Waiman Long @ 2013-11-12  2:09 UTC (permalink / raw)
  To: Tim Chen, Peter Zijlstra
  Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-mm, linux-arch, Linus Torvalds,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel,
	Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd

On 11/11/2013 04:17 PM, Tim Chen wrote:
>> You could then augment that with [cmp]xchg_{acquire,release} as
>> appropriate.
>>
>>> +/*
>>>    * In order to acquire the lock, the caller should declare a local node and
>>>    * pass a reference of the node to this function in addition to the lock.
>>>    * If the lock has already been acquired, then this will proceed to spin
>>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>>>   	node->locked = 0;
>>>   	node->next   = NULL;
>>>
>>> -	prev = xchg(lock, node);
>>> +	/* xchg() provides a memory barrier */
>>> +	prev = xchg_acquire(lock, node);
>>>   	if (likely(prev == NULL)) {
>>>   		/* Lock acquired */
>>>   		return;
>>>   	}
>>>   	ACCESS_ONCE(prev->next) = node;
>>> -	smp_wmb();
>>> -	/* Wait until the lock holder passes the lock down */
>>> -	while (!ACCESS_ONCE(node->locked))
>>> +	/*
>>> +	 * Wait until the lock holder passes the lock down.
>>> +	 * Using smp_load_acquire() provides a memory barrier that
>>> +	 * ensures subsequent operations happen after the lock is acquired.
>>> +	 */
>>> +	while (!(smp_load_acquire(&node->locked)))
>>>   		arch_mutex_cpu_relax();
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
>
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?
>
> Tim

I second Tim's opinion. It will be help to have a smp_mb_load_acquire() 
function that provide a memory barrier with load-acquire semantic. I 
don't think we need one for store-release as that will not be in a loop.

Peter, what do you think about adding that to your patch?

-Longman

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-12  2:09         ` Waiman Long
  0 siblings, 0 replies; 70+ messages in thread
From: Waiman Long @ 2013-11-12  2:09 UTC (permalink / raw)
  To: Tim Chen, Peter Zijlstra
  Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-mm, linux-arch, Linus Torvalds,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel,
	Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Figo.zhang

On 11/11/2013 04:17 PM, Tim Chen wrote:
>> You could then augment that with [cmp]xchg_{acquire,release} as
>> appropriate.
>>
>>> +/*
>>>    * In order to acquire the lock, the caller should declare a local node and
>>>    * pass a reference of the node to this function in addition to the lock.
>>>    * If the lock has already been acquired, then this will proceed to spin
>>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>>>   	node->locked = 0;
>>>   	node->next   = NULL;
>>>
>>> -	prev = xchg(lock, node);
>>> +	/* xchg() provides a memory barrier */
>>> +	prev = xchg_acquire(lock, node);
>>>   	if (likely(prev == NULL)) {
>>>   		/* Lock acquired */
>>>   		return;
>>>   	}
>>>   	ACCESS_ONCE(prev->next) = node;
>>> -	smp_wmb();
>>> -	/* Wait until the lock holder passes the lock down */
>>> -	while (!ACCESS_ONCE(node->locked))
>>> +	/*
>>> +	 * Wait until the lock holder passes the lock down.
>>> +	 * Using smp_load_acquire() provides a memory barrier that
>>> +	 * ensures subsequent operations happen after the lock is acquired.
>>> +	 */
>>> +	while (!(smp_load_acquire(&node->locked)))
>>>   		arch_mutex_cpu_relax();
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
>
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?
>
> Tim

I second Tim's opinion. It will be help to have a smp_mb_load_acquire() 
function that provide a memory barrier with load-acquire semantic. I 
don't think we need one for store-release as that will not be in a loop.

Peter, what do you think about adding that to your patch?

-Longman

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-11 21:17       ` Tim Chen
@ 2013-11-12 14:54         ` Waiman Long
  -1 siblings, 0 replies; 70+ messages in thread
From: Waiman Long @ 2013-11-12 14:54 UTC (permalink / raw)
  To: Tim Chen, Peter Zijlstra
  Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-arch, Linus Torvalds, Andrea Arcangeli,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang

On 11/11/2013 04:17 PM, Tim Chen wrote:
>> You could then augment that with [cmp]xchg_{acquire,release} as
>> appropriate.
>>
>>> +/*
>>>    * In order to acquire the lock, the caller should declare a local node and
>>>    * pass a reference of the node to this function in addition to the lock.
>>>    * If the lock has already been acquired, then this will proceed to spin
>>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>>>   	node->locked = 0;
>>>   	node->next   = NULL;
>>>
>>> -	prev = xchg(lock, node);
>>> +	/* xchg() provides a memory barrier */
>>> +	prev = xchg_acquire(lock, node);
>>>   	if (likely(prev == NULL)) {
>>>   		/* Lock acquired */
>>>   		return;
>>>   	}
>>>   	ACCESS_ONCE(prev->next) = node;
>>> -	smp_wmb();
>>> -	/* Wait until the lock holder passes the lock down */
>>> -	while (!ACCESS_ONCE(node->locked))
>>> +	/*
>>> +	 * Wait until the lock holder passes the lock down.
>>> +	 * Using smp_load_acquire() provides a memory barrier that
>>> +	 * ensures subsequent operations happen after the lock is acquired.
>>> +	 */
>>> +	while (!(smp_load_acquire(&node->locked)))
>>>   		arch_mutex_cpu_relax();
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
>
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?
>
> Tim

I second Tim's opinion. It will be help to have a smp_mb_load_acquire() 
function that provide a memory barrier with load-acquire semantic. I 
don't think we need one for store-release as that will not be in a loop.

Peter, what do you think about adding that to your patch?

-Longman

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-12 14:54         ` Waiman Long
  0 siblings, 0 replies; 70+ messages in thread
From: Waiman Long @ 2013-11-12 14:54 UTC (permalink / raw)
  To: Tim Chen, Peter Zijlstra
  Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-arch, Linus Torvalds, Andrea Arcangeli,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott

On 11/11/2013 04:17 PM, Tim Chen wrote:
>> You could then augment that with [cmp]xchg_{acquire,release} as
>> appropriate.
>>
>>> +/*
>>>    * In order to acquire the lock, the caller should declare a local node and
>>>    * pass a reference of the node to this function in addition to the lock.
>>>    * If the lock has already been acquired, then this will proceed to spin
>>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>>>   	node->locked = 0;
>>>   	node->next   = NULL;
>>>
>>> -	prev = xchg(lock, node);
>>> +	/* xchg() provides a memory barrier */
>>> +	prev = xchg_acquire(lock, node);
>>>   	if (likely(prev == NULL)) {
>>>   		/* Lock acquired */
>>>   		return;
>>>   	}
>>>   	ACCESS_ONCE(prev->next) = node;
>>> -	smp_wmb();
>>> -	/* Wait until the lock holder passes the lock down */
>>> -	while (!ACCESS_ONCE(node->locked))
>>> +	/*
>>> +	 * Wait until the lock holder passes the lock down.
>>> +	 * Using smp_load_acquire() provides a memory barrier that
>>> +	 * ensures subsequent operations happen after the lock is acquired.
>>> +	 */
>>> +	while (!(smp_load_acquire(&node->locked)))
>>>   		arch_mutex_cpu_relax();
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
>
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?
>
> Tim

I second Tim's opinion. It will be help to have a smp_mb_load_acquire() 
function that provide a memory barrier with load-acquire semantic. I 
don't think we need one for store-release as that will not be in a loop.

Peter, what do you think about adding that to your patch?

-Longman

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-11 21:17       ` Tim Chen
@ 2013-11-12 16:08         ` Will Deacon
  -1 siblings, 0 replies; 70+ messages in thread
From: Will Deacon @ 2013-11-12 16:08 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T

On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote:
> On Mon, 2013-11-11 at 18:10 +0000, Will Deacon wrote:
> > On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote:
> > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > > index b6f27f8..df5c167 100644
> > > --- a/kernel/locking/mcs_spinlock.c
> > > +++ b/kernel/locking/mcs_spinlock.c
> > > @@ -23,6 +23,31 @@
> > >  #endif
> > >  
> > >  /*
> > > + * Fall back to use the regular atomic operations and memory barrier if
> > > + * the acquire/release versions are not defined.
> > > + */
> > > +#ifndef	xchg_acquire
> > > +# define xchg_acquire(p, v)		xchg(p, v)
> > > +#endif
> > > +
> > > +#ifndef	smp_load_acquire
> > > +# define smp_load_acquire(p)				\
> > > +	({						\
> > > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > > +		smp_mb();				\
> > > +		__v;					\
> > > +	})
> > > +#endif
> > > +
> > > +#ifndef smp_store_release
> > > +# define smp_store_release(p, v)		\
> > > +	do {					\
> > > +		smp_mb();			\
> > > +		ACCESS_ONCE(*(p)) = v;		\
> > > +	} while (0)
> > > +#endif
> > 
> > PeterZ already has a series implementing acquire/release accessors, so you
> > should probably take a look at that rather than rolling your own here.
> 
> Yes, we are using Peter Z's implementation here.  The above is for anything
> where smp_load_acquire and smp_store_release are *not* defined.  We can
> remove this once all architectures implement the acquire and release 
> functions as mentioned in the comments of the patch.

Right, so you can use barrier.h and asm-generic will define generic versions
(identical to the above) for you if the architecture doesn't have an
optimised variant. You don't need to reproduce that in your .c file.

> > > +	/*
> > > +	 * Wait until the lock holder passes the lock down.
> > > +	 * Using smp_load_acquire() provides a memory barrier that
> > > +	 * ensures subsequent operations happen after the lock is acquired.
> > > +	 */
> > > +	while (!(smp_load_acquire(&node->locked)))
> > >  		arch_mutex_cpu_relax();
> > 
> > After a chat with some micro-architects, I'm going to have to disagree with
> > Paul here. For architectures where acquire/release are implemented with
> > explicit barriers (similarly for simple microarchitectures), emitting
> > barriers in a loop *is* going to have an affect on overall performance,
> > since those barriers may well result in traffic outside of the core (at
> > least, on ARM).
> > 
> > Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> > doesn't have a corresponding hook on the unlock side. On ARM, for example,
> > we can enter a low-power state using the wfe instruction, but that requires
> > the unlocker to wake up the core when the lock is released.
> 
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
> 
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?

It still doesn't solve my problem though: I want a way to avoid that busy
loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook
is a start, but there is no corresponding hook on the unlock side to issue a
wakeup. Given a sensible relax implementation, I don't have an issue with
putting a load-acquire in a loop, since it shouldn't be aggresively spinning
anymore.

Will

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-12 16:08         ` Will Deacon
  0 siblings, 0 replies; 70+ messages in thread
From: Will Deacon @ 2013-11-12 16:08 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T,
	George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang

On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote:
> On Mon, 2013-11-11 at 18:10 +0000, Will Deacon wrote:
> > On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote:
> > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > > index b6f27f8..df5c167 100644
> > > --- a/kernel/locking/mcs_spinlock.c
> > > +++ b/kernel/locking/mcs_spinlock.c
> > > @@ -23,6 +23,31 @@
> > >  #endif
> > >  
> > >  /*
> > > + * Fall back to use the regular atomic operations and memory barrier if
> > > + * the acquire/release versions are not defined.
> > > + */
> > > +#ifndef	xchg_acquire
> > > +# define xchg_acquire(p, v)		xchg(p, v)
> > > +#endif
> > > +
> > > +#ifndef	smp_load_acquire
> > > +# define smp_load_acquire(p)				\
> > > +	({						\
> > > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > > +		smp_mb();				\
> > > +		__v;					\
> > > +	})
> > > +#endif
> > > +
> > > +#ifndef smp_store_release
> > > +# define smp_store_release(p, v)		\
> > > +	do {					\
> > > +		smp_mb();			\
> > > +		ACCESS_ONCE(*(p)) = v;		\
> > > +	} while (0)
> > > +#endif
> > 
> > PeterZ already has a series implementing acquire/release accessors, so you
> > should probably take a look at that rather than rolling your own here.
> 
> Yes, we are using Peter Z's implementation here.  The above is for anything
> where smp_load_acquire and smp_store_release are *not* defined.  We can
> remove this once all architectures implement the acquire and release 
> functions as mentioned in the comments of the patch.

Right, so you can use barrier.h and asm-generic will define generic versions
(identical to the above) for you if the architecture doesn't have an
optimised variant. You don't need to reproduce that in your .c file.

> > > +	/*
> > > +	 * Wait until the lock holder passes the lock down.
> > > +	 * Using smp_load_acquire() provides a memory barrier that
> > > +	 * ensures subsequent operations happen after the lock is acquired.
> > > +	 */
> > > +	while (!(smp_load_acquire(&node->locked)))
> > >  		arch_mutex_cpu_relax();
> > 
> > After a chat with some micro-architects, I'm going to have to disagree with
> > Paul here. For architectures where acquire/release are implemented with
> > explicit barriers (similarly for simple microarchitectures), emitting
> > barriers in a loop *is* going to have an affect on overall performance,
> > since those barriers may well result in traffic outside of the core (at
> > least, on ARM).
> > 
> > Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> > doesn't have a corresponding hook on the unlock side. On ARM, for example,
> > we can enter a low-power state using the wfe instruction, but that requires
> > the unlocker to wake up the core when the lock is released.
> 
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
> 
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?

It still doesn't solve my problem though: I want a way to avoid that busy
loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook
is a start, but there is no corresponding hook on the unlock side to issue a
wakeup. Given a sensible relax implementation, I don't have an issue with
putting a load-acquire in a loop, since it shouldn't be aggresively spinning
anymore.

Will

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-12 16:08         ` Will Deacon
@ 2013-11-12 17:16           ` George Spelvin
  -1 siblings, 0 replies; 70+ messages in thread
From: George Spelvin @ 2013-11-12 17:16 UTC (permalink / raw)
  To: tim.c.chen, will.deacon
  Cc: a.p.zijlstra, aarcange, akpm, alex.shi, andi, arnd, aswin,
	dave.hansen, davidlohr.bueso, figo1802, hpa, linux-arch,
	linux-kernel, linux-mm, linux, matthew.r.wilcox, mingo, paulmck,
	peter, raghavendra.kt, riel, scott.norton, tglx, torvalds,
	waiman.long, walken

> On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote:
>> An alternate implementation is
>> 	while (!ACCESS_ONCE(node->locked))
>> 		arch_mutex_cpu_relax();
>> 	smp_load_acquire(&node->locked);
>> 
>> Leaving the smp_load_acquire at the end to provide appropriate barrier.
>> Will that be acceptable?

Will Deacon <will.deacon@arm.com> wrote:
> It still doesn't solve my problem though: I want a way to avoid that busy
> loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook
> is a start, but there is no corresponding hook on the unlock side to issue a
> wakeup. Given a sensible relax implementation, I don't have an issue with
> putting a load-acquire in a loop, since it shouldn't be aggresively spinning
> anymore.

So you want something like this?

/*
 * This is a spin-wait with acquire semantics.  That is, accesses after
 * this are not allowed to be reordered before the load that meets
 * the specified condition.  This requires that it end with either a
 * load-acquire or a full smp_mb().  The optimal way to do this is likely
 * to be architecture-dependent.  E.g. x86 MONITOR/MWAIT instructions.
 */
#ifndef smp_load_acquire_until
#define smp_load_acquire_until(addr, cond) \
	while (!(smp_load_acquire(addr) cond)) { \
		do { \
			arch_mutex_cpu_relax(); \
		} while (!(ACCESS_ONCE(*(addr)) cond)); \
	}
#endif

	smp_load_acquire_until(&node->locked, != 0);

Alternative implementations:

#define smp_load_acquire_until(addr, cond) { \
	while (!(ACCESS_ONCE(*(addr)) cond)) \
		arch_mutex_cpu_relax(); \
	smp_mb(); }

#define smp_load_acquire_until(addr, cond) \
	if (!(smp_load_acquire(addr) cond)) { \
		do { \
			arch_mutex_cpu_relax(); \
		} while (!(ACCESS_ONCE(*(addr)) cond)); \
		smp_mb(); \
	}

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-12 17:16           ` George Spelvin
  0 siblings, 0 replies; 70+ messages in thread
From: George Spelvin @ 2013-11-12 17:16 UTC (permalink / raw)
  To: tim.c.chen, will.deacon
  Cc: a.p.zijlstra, aarcange, akpm, alex.shi, andi, arnd, aswin,
	dave.hansen, davidlohr.bueso, figo1802, hpa, linux-arch,
	linux-kernel, linux-mm, linux, matthew.r.wilcox, mingo, paulmck,
	peter, raghavendra.kt, riel, scott.norton, tglx, torvalds,
	waiman.long, walken

> On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote:
>> An alternate implementation is
>> 	while (!ACCESS_ONCE(node->locked))
>> 		arch_mutex_cpu_relax();
>> 	smp_load_acquire(&node->locked);
>> 
>> Leaving the smp_load_acquire at the end to provide appropriate barrier.
>> Will that be acceptable?

Will Deacon <will.deacon@arm.com> wrote:
> It still doesn't solve my problem though: I want a way to avoid that busy
> loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook
> is a start, but there is no corresponding hook on the unlock side to issue a
> wakeup. Given a sensible relax implementation, I don't have an issue with
> putting a load-acquire in a loop, since it shouldn't be aggresively spinning
> anymore.

So you want something like this?

/*
 * This is a spin-wait with acquire semantics.  That is, accesses after
 * this are not allowed to be reordered before the load that meets
 * the specified condition.  This requires that it end with either a
 * load-acquire or a full smp_mb().  The optimal way to do this is likely
 * to be architecture-dependent.  E.g. x86 MONITOR/MWAIT instructions.
 */
#ifndef smp_load_acquire_until
#define smp_load_acquire_until(addr, cond) \
	while (!(smp_load_acquire(addr) cond)) { \
		do { \
			arch_mutex_cpu_relax(); \
		} while (!(ACCESS_ONCE(*(addr)) cond)); \
	}
#endif

	smp_load_acquire_until(&node->locked, != 0);

Alternative implementations:

#define smp_load_acquire_until(addr, cond) { \
	while (!(ACCESS_ONCE(*(addr)) cond)) \
		arch_mutex_cpu_relax(); \
	smp_mb(); }

#define smp_load_acquire_until(addr, cond) \
	if (!(smp_load_acquire(addr) cond)) { \
		do { \
			arch_mutex_cpu_relax(); \
		} while (!(ACCESS_ONCE(*(addr)) cond)); \
		smp_mb(); \
	}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-12 17:16           ` George Spelvin
@ 2013-11-13 17:37             ` Will Deacon
  -1 siblings, 0 replies; 70+ messages in thread
From: Will Deacon @ 2013-11-13 17:37 UTC (permalink / raw)
  To: George Spelvin
  Cc: tim.c.chen, a.p.zijlstra, aarcange, akpm, alex.shi, andi, arnd,
	aswin, dave.hansen, davidlohr.bueso, figo1802, hpa, linux-arch,
	linux-kernel, linux-mm, matthew.r.wilcox, mingo,
	paulmck@linux.vnet.ibm.com

On Tue, Nov 12, 2013 at 05:16:33PM +0000, George Spelvin wrote:
> > On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote:
> >> An alternate implementation is
> >> 	while (!ACCESS_ONCE(node->locked))
> >> 		arch_mutex_cpu_relax();
> >> 	smp_load_acquire(&node->locked);
> >> 
> >> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> >> Will that be acceptable?
> 
> Will Deacon <will.deacon@arm.com> wrote:
> > It still doesn't solve my problem though: I want a way to avoid that busy
> > loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook
> > is a start, but there is no corresponding hook on the unlock side to issue a
> > wakeup. Given a sensible relax implementation, I don't have an issue with
> > putting a load-acquire in a loop, since it shouldn't be aggresively spinning
> > anymore.
> 
> So you want something like this?
> 
> /*
>  * This is a spin-wait with acquire semantics.  That is, accesses after
>  * this are not allowed to be reordered before the load that meets
>  * the specified condition.  This requires that it end with either a
>  * load-acquire or a full smp_mb().  The optimal way to do this is likely
>  * to be architecture-dependent.  E.g. x86 MONITOR/MWAIT instructions.
>  */
> #ifndef smp_load_acquire_until
> #define smp_load_acquire_until(addr, cond) \
> 	while (!(smp_load_acquire(addr) cond)) { \
> 		do { \
> 			arch_mutex_cpu_relax(); \
> 		} while (!(ACCESS_ONCE(*(addr)) cond)); \
> 	}
> #endif
> 
> 	smp_load_acquire_until(&node->locked, != 0);
> 
> Alternative implementations:
> 
> #define smp_load_acquire_until(addr, cond) { \
> 	while (!(ACCESS_ONCE(*(addr)) cond)) \
> 		arch_mutex_cpu_relax(); \
> 	smp_mb(); }
> 
> #define smp_load_acquire_until(addr, cond) \
> 	if (!(smp_load_acquire(addr) cond)) { \
> 		do { \
> 			arch_mutex_cpu_relax(); \
> 		} while (!(ACCESS_ONCE(*(addr)) cond)); \
> 		smp_mb(); \
> 	}

Not really...

To be clear: having the load-acquire in a loop is fine, provided that
arch_mutex_cpu_relax is something which causes the load to back-off (you
mentioned the MONITOR/MWAIT instructions on x86).

On ARM, our equivalent of those instructions also has a counterpart
instruction that needs to be executed by the CPU doing the unlock. That
means we can do one of two things:

	1. Add an arch hook in the unlock path to pair with the relax()
	   call on the lock path (arch_mutex_cpu_wake() ?).

	2. Move most of the code into arch_mcs_[un]lock, like we do for
	   spinlocks.

Whilst (1) would suffice, (2) would allow further optimisation on arm64,
where we can play tricks to avoid the explicit wakeup if we can control the
way in which the lock value is written.

Will

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-13 17:37             ` Will Deacon
  0 siblings, 0 replies; 70+ messages in thread
From: Will Deacon @ 2013-11-13 17:37 UTC (permalink / raw)
  To: George Spelvin
  Cc: tim.c.chen, a.p.zijlstra, aarcange, akpm, alex.shi, andi, arnd,
	aswin, dave.hansen, davidlohr.bueso, figo1802, hpa, linux-arch,
	linux-kernel, linux-mm, matthew.r.wilcox, mingo, paulmck, peter,
	raghavendra.kt, riel, scott.norton, tglx, torvalds, waiman.long,
	walken

On Tue, Nov 12, 2013 at 05:16:33PM +0000, George Spelvin wrote:
> > On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote:
> >> An alternate implementation is
> >> 	while (!ACCESS_ONCE(node->locked))
> >> 		arch_mutex_cpu_relax();
> >> 	smp_load_acquire(&node->locked);
> >> 
> >> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> >> Will that be acceptable?
> 
> Will Deacon <will.deacon@arm.com> wrote:
> > It still doesn't solve my problem though: I want a way to avoid that busy
> > loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook
> > is a start, but there is no corresponding hook on the unlock side to issue a
> > wakeup. Given a sensible relax implementation, I don't have an issue with
> > putting a load-acquire in a loop, since it shouldn't be aggresively spinning
> > anymore.
> 
> So you want something like this?
> 
> /*
>  * This is a spin-wait with acquire semantics.  That is, accesses after
>  * this are not allowed to be reordered before the load that meets
>  * the specified condition.  This requires that it end with either a
>  * load-acquire or a full smp_mb().  The optimal way to do this is likely
>  * to be architecture-dependent.  E.g. x86 MONITOR/MWAIT instructions.
>  */
> #ifndef smp_load_acquire_until
> #define smp_load_acquire_until(addr, cond) \
> 	while (!(smp_load_acquire(addr) cond)) { \
> 		do { \
> 			arch_mutex_cpu_relax(); \
> 		} while (!(ACCESS_ONCE(*(addr)) cond)); \
> 	}
> #endif
> 
> 	smp_load_acquire_until(&node->locked, != 0);
> 
> Alternative implementations:
> 
> #define smp_load_acquire_until(addr, cond) { \
> 	while (!(ACCESS_ONCE(*(addr)) cond)) \
> 		arch_mutex_cpu_relax(); \
> 	smp_mb(); }
> 
> #define smp_load_acquire_until(addr, cond) \
> 	if (!(smp_load_acquire(addr) cond)) { \
> 		do { \
> 			arch_mutex_cpu_relax(); \
> 		} while (!(ACCESS_ONCE(*(addr)) cond)); \
> 		smp_mb(); \
> 	}

Not really...

To be clear: having the load-acquire in a loop is fine, provided that
arch_mutex_cpu_relax is something which causes the load to back-off (you
mentioned the MONITOR/MWAIT instructions on x86).

On ARM, our equivalent of those instructions also has a counterpart
instruction that needs to be executed by the CPU doing the unlock. That
means we can do one of two things:

	1. Add an arch hook in the unlock path to pair with the relax()
	   call on the lock path (arch_mutex_cpu_wake() ?).

	2. Move most of the code into arch_mcs_[un]lock, like we do for
	   spinlocks.

Whilst (1) would suffice, (2) would allow further optimisation on arm64,
where we can play tricks to avoid the explicit wakeup if we can control the
way in which the lock value is written.

Will

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
  2013-11-08 19:51   ` Tim Chen
  (?)
@ 2013-11-19 19:10     ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:10 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> We will need the MCS lock code for doing optimistic spinning for rwsem
> and queue rwlock.  Extracting the MCS code from mutex.c and put into
> its own file allow us to reuse this code easily.
> 
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>

Please see comments below.

							Thanx, Paul

> ---
>  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
>  include/linux/mutex.h        |    5 ++-
>  kernel/locking/mutex.c       |   60 ++++----------------------------------
>  3 files changed, 74 insertions(+), 55 deletions(-)
>  create mode 100644 include/linux/mcs_spinlock.h
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> new file mode 100644
> index 0000000..b5de3b0
> --- /dev/null
> +++ b/include/linux/mcs_spinlock.h
> @@ -0,0 +1,64 @@
> +/*
> + * MCS lock defines
> + *
> + * This file contains the main data structure and API definitions of MCS lock.
> + *
> + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> + * with the desirable properties of being fair, and with each cpu trying
> + * to acquire the lock spinning on a local variable.
> + * It avoids expensive cache bouncings that common test-and-set spin-lock
> + * implementations incur.
> + */
> +#ifndef __LINUX_MCS_SPINLOCK_H
> +#define __LINUX_MCS_SPINLOCK_H
> +
> +struct mcs_spinlock {
> +	struct mcs_spinlock *next;
> +	int locked; /* 1 if lock acquired */
> +};
> +
> +/*
> + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> + * time spent in this lock function.
> + */
> +static noinline
> +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +{
> +	struct mcs_spinlock *prev;
> +
> +	/* Init node */
> +	node->locked = 0;
> +	node->next   = NULL;
> +
> +	prev = xchg(lock, node);

OK, the full memory barriers implied by xchg() ensure that *node will be
initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
node into the list.  This rules out the misordering scenario that Tim
Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
on September 27th.

Assuming of course a corresponding barrier on the lock handoff side.

> +	if (likely(prev == NULL)) {
> +		/* Lock acquired */
> +		node->locked = 1;
> +		return;
> +	}
> +	ACCESS_ONCE(prev->next) = node;
> +	smp_wmb();

I don't see what the above memory barrier does.  Here are some things
that it cannot be doing:

o	Ordering the insertion into the list above with the polling
	below.  First, smp_wmb() does not order prior writes against
	later reads, and second misordering is harmless.  If we start
	polling before the insertion is complete, all that happens
	is that the first few polls have no chance of seeing a lock
	grant.

o	Ordering the polling against the initialization -- the above
	xchg() is already doing that for us.

So what is its purpose?

> +	/* Wait until the lock holder passes the lock down */
> +	while (!ACCESS_ONCE(node->locked))
> +		arch_mutex_cpu_relax();

On the other hand, I don't see how we get away without a barrier here.
As written, what prevents the caller's load from ->owner from being
reordered with the above load from ->locked?  (Perhaps you can argue
that such reordering is only a performance problem, but if so we need
that argument recorded in comments.)

Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
they will need a memory barrier here to prevent the critical section
from leaking out.

> +}
> +
> +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +{
> +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> +
> +	if (likely(!next)) {
> +		/*
> +		 * Release the lock by setting it to NULL
> +		 */
> +		if (cmpxchg(lock, node, NULL) == node)
> +			return;
> +		/* Wait until the next pointer is set */
> +		while (!(next = ACCESS_ONCE(node->next)))
> +			arch_mutex_cpu_relax();
> +	}

We need a memory barrier somewhere before here in this function,
otherwise the critical section can leak out.  I do not believe that
we can rely on the prohibition against speculative stores that Peter
Zijlstra and I have been discussing because that does not provide the
transitivity required by locking primitives.  I believe that we -could-
make the access below be an smp_store_release(), though.

Placing the barrier here (or at least not preceding the initial
fetch from node->next) has the advantage of allowing it to pair with
the xchg() in mcs_spin_lock(), though given the dependency only an
smp_read_barrier_depends() is required for that purpose.

> +	ACCESS_ONCE(next->locked) = 1;
> +	smp_wmb();

I don't see what this barrier does for us.  It is ordering the unlock
store with what, exactly?

If it really is doing something, we need a big fat comment stating what
that is, and checkpatch.pl will be happy to inform you.  ;-)

> +}
> +
> +#endif /* __LINUX_MCS_SPINLOCK_H */
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index bab49da..32a32e6 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -46,6 +46,7 @@
>   * - detects multi-task circular deadlocks and prints out all affected
>   *   locks and tasks (and only those tasks)
>   */
> +struct mcs_spinlock;
>  struct mutex {
>  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
>  	atomic_t		count;
> @@ -55,7 +56,7 @@ struct mutex {
>  	struct task_struct	*owner;
>  #endif
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> -	void			*spin_mlock;	/* Spinner MCS lock */
> +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
>  #endif
>  #ifdef CONFIG_DEBUG_MUTEXES
>  	const char 		*name;
> @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
>  # define arch_mutex_cpu_relax() cpu_relax()
>  #endif
> 
> -#endif
> +#endif /* __LINUX_MUTEX_H */
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index d24105b..e08b183 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -25,6 +25,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/interrupt.h>
>  #include <linux/debug_locks.h>
> +#include <linux/mcs_spinlock.h>
> 
>  /*
>   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
>  	INIT_LIST_HEAD(&lock->wait_list);
>  	mutex_clear_owner(lock);
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> -	lock->spin_mlock = NULL;
> +	lock->mcs_lock = NULL;
>  #endif
> 
>  	debug_mutex_init(lock, name, key);
> @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
>   * more or less simultaneously, the spinners need to acquire a MCS lock
>   * first before spinning on the owner field.
>   *
> - * We don't inline mspin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
>   */
> -struct mspin_node {
> -	struct mspin_node *next ;
> -	int		  locked;	/* 1 if lock acquired */
> -};
> -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> -
> -static noinline
> -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> -{
> -	struct mspin_node *prev;
> -
> -	/* Init node */
> -	node->locked = 0;
> -	node->next   = NULL;
> -
> -	prev = xchg(lock, node);
> -	if (likely(prev == NULL)) {
> -		/* Lock acquired */
> -		node->locked = 1;
> -		return;
> -	}
> -	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> -		arch_mutex_cpu_relax();
> -}
> -
> -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> -{
> -	struct mspin_node *next = ACCESS_ONCE(node->next);
> -
> -	if (likely(!next)) {
> -		/*
> -		 * Release the lock by setting it to NULL
> -		 */
> -		if (cmpxchg(lock, node, NULL) == node)
> -			return;
> -		/* Wait until the next pointer is set */
> -		while (!(next = ACCESS_ONCE(node->next)))
> -			arch_mutex_cpu_relax();
> -	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> -}
> 
>  /*
>   * Mutex spinning code migrated from kernel/sched/core.c
> @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> 
>  	for (;;) {
>  		struct task_struct *owner;
> -		struct mspin_node  node;
> +		struct mcs_spinlock  node;
> 
>  		if (use_ww_ctx && ww_ctx->acquired > 0) {
>  			struct ww_mutex *ww;
> @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		 * If there's an owner, wait for it to either
>  		 * release the lock or go to sleep.
>  		 */
> -		mspin_lock(MLOCK(lock), &node);
> +		mcs_spin_lock(&lock->mcs_lock, &node);
>  		owner = ACCESS_ONCE(lock->owner);
>  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> -			mspin_unlock(MLOCK(lock), &node);
> +			mcs_spin_unlock(&lock->mcs_lock, &node);
>  			goto slowpath;
>  		}
> 
> @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  			}
> 
>  			mutex_set_owner(lock);
> -			mspin_unlock(MLOCK(lock), &node);
> +			mcs_spin_unlock(&lock->mcs_lock, &node);
>  			preempt_enable();
>  			return 0;
>  		}
> -		mspin_unlock(MLOCK(lock), &node);
> +		mcs_spin_unlock(&lock->mcs_lock, &node);
> 
>  		/*
>  		 * When there's no owner, we might have preempted between the
> -- 
> 1.7.4.4
> 
> 
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
@ 2013-11-19 19:10     ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:10 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin

On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> We will need the MCS lock code for doing optimistic spinning for rwsem
> and queue rwlock.  Extracting the MCS code from mutex.c and put into
> its own file allow us to reuse this code easily.
> 
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>

Please see comments below.

							Thanx, Paul

> ---
>  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
>  include/linux/mutex.h        |    5 ++-
>  kernel/locking/mutex.c       |   60 ++++----------------------------------
>  3 files changed, 74 insertions(+), 55 deletions(-)
>  create mode 100644 include/linux/mcs_spinlock.h
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> new file mode 100644
> index 0000000..b5de3b0
> --- /dev/null
> +++ b/include/linux/mcs_spinlock.h
> @@ -0,0 +1,64 @@
> +/*
> + * MCS lock defines
> + *
> + * This file contains the main data structure and API definitions of MCS lock.
> + *
> + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> + * with the desirable properties of being fair, and with each cpu trying
> + * to acquire the lock spinning on a local variable.
> + * It avoids expensive cache bouncings that common test-and-set spin-lock
> + * implementations incur.
> + */
> +#ifndef __LINUX_MCS_SPINLOCK_H
> +#define __LINUX_MCS_SPINLOCK_H
> +
> +struct mcs_spinlock {
> +	struct mcs_spinlock *next;
> +	int locked; /* 1 if lock acquired */
> +};
> +
> +/*
> + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> + * time spent in this lock function.
> + */
> +static noinline
> +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +{
> +	struct mcs_spinlock *prev;
> +
> +	/* Init node */
> +	node->locked = 0;
> +	node->next   = NULL;
> +
> +	prev = xchg(lock, node);

OK, the full memory barriers implied by xchg() ensure that *node will be
initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
node into the list.  This rules out the misordering scenario that Tim
Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
on September 27th.

Assuming of course a corresponding barrier on the lock handoff side.

> +	if (likely(prev == NULL)) {
> +		/* Lock acquired */
> +		node->locked = 1;
> +		return;
> +	}
> +	ACCESS_ONCE(prev->next) = node;
> +	smp_wmb();

I don't see what the above memory barrier does.  Here are some things
that it cannot be doing:

o	Ordering the insertion into the list above with the polling
	below.  First, smp_wmb() does not order prior writes against
	later reads, and second misordering is harmless.  If we start
	polling before the insertion is complete, all that happens
	is that the first few polls have no chance of seeing a lock
	grant.

o	Ordering the polling against the initialization -- the above
	xchg() is already doing that for us.

So what is its purpose?

> +	/* Wait until the lock holder passes the lock down */
> +	while (!ACCESS_ONCE(node->locked))
> +		arch_mutex_cpu_relax();

On the other hand, I don't see how we get away without a barrier here.
As written, what prevents the caller's load from ->owner from being
reordered with the above load from ->locked?  (Perhaps you can argue
that such reordering is only a performance problem, but if so we need
that argument recorded in comments.)

Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
they will need a memory barrier here to prevent the critical section
from leaking out.

> +}
> +
> +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +{
> +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> +
> +	if (likely(!next)) {
> +		/*
> +		 * Release the lock by setting it to NULL
> +		 */
> +		if (cmpxchg(lock, node, NULL) == node)
> +			return;
> +		/* Wait until the next pointer is set */
> +		while (!(next = ACCESS_ONCE(node->next)))
> +			arch_mutex_cpu_relax();
> +	}

We need a memory barrier somewhere before here in this function,
otherwise the critical section can leak out.  I do not believe that
we can rely on the prohibition against speculative stores that Peter
Zijlstra and I have been discussing because that does not provide the
transitivity required by locking primitives.  I believe that we -could-
make the access below be an smp_store_release(), though.

Placing the barrier here (or at least not preceding the initial
fetch from node->next) has the advantage of allowing it to pair with
the xchg() in mcs_spin_lock(), though given the dependency only an
smp_read_barrier_depends() is required for that purpose.

> +	ACCESS_ONCE(next->locked) = 1;
> +	smp_wmb();

I don't see what this barrier does for us.  It is ordering the unlock
store with what, exactly?

If it really is doing something, we need a big fat comment stating what
that is, and checkpatch.pl will be happy to inform you.  ;-)

> +}
> +
> +#endif /* __LINUX_MCS_SPINLOCK_H */
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index bab49da..32a32e6 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -46,6 +46,7 @@
>   * - detects multi-task circular deadlocks and prints out all affected
>   *   locks and tasks (and only those tasks)
>   */
> +struct mcs_spinlock;
>  struct mutex {
>  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
>  	atomic_t		count;
> @@ -55,7 +56,7 @@ struct mutex {
>  	struct task_struct	*owner;
>  #endif
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> -	void			*spin_mlock;	/* Spinner MCS lock */
> +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
>  #endif
>  #ifdef CONFIG_DEBUG_MUTEXES
>  	const char 		*name;
> @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
>  # define arch_mutex_cpu_relax() cpu_relax()
>  #endif
> 
> -#endif
> +#endif /* __LINUX_MUTEX_H */
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index d24105b..e08b183 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -25,6 +25,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/interrupt.h>
>  #include <linux/debug_locks.h>
> +#include <linux/mcs_spinlock.h>
> 
>  /*
>   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
>  	INIT_LIST_HEAD(&lock->wait_list);
>  	mutex_clear_owner(lock);
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> -	lock->spin_mlock = NULL;
> +	lock->mcs_lock = NULL;
>  #endif
> 
>  	debug_mutex_init(lock, name, key);
> @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
>   * more or less simultaneously, the spinners need to acquire a MCS lock
>   * first before spinning on the owner field.
>   *
> - * We don't inline mspin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
>   */
> -struct mspin_node {
> -	struct mspin_node *next ;
> -	int		  locked;	/* 1 if lock acquired */
> -};
> -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> -
> -static noinline
> -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> -{
> -	struct mspin_node *prev;
> -
> -	/* Init node */
> -	node->locked = 0;
> -	node->next   = NULL;
> -
> -	prev = xchg(lock, node);
> -	if (likely(prev == NULL)) {
> -		/* Lock acquired */
> -		node->locked = 1;
> -		return;
> -	}
> -	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> -		arch_mutex_cpu_relax();
> -}
> -
> -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> -{
> -	struct mspin_node *next = ACCESS_ONCE(node->next);
> -
> -	if (likely(!next)) {
> -		/*
> -		 * Release the lock by setting it to NULL
> -		 */
> -		if (cmpxchg(lock, node, NULL) == node)
> -			return;
> -		/* Wait until the next pointer is set */
> -		while (!(next = ACCESS_ONCE(node->next)))
> -			arch_mutex_cpu_relax();
> -	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> -}
> 
>  /*
>   * Mutex spinning code migrated from kernel/sched/core.c
> @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> 
>  	for (;;) {
>  		struct task_struct *owner;
> -		struct mspin_node  node;
> +		struct mcs_spinlock  node;
> 
>  		if (use_ww_ctx && ww_ctx->acquired > 0) {
>  			struct ww_mutex *ww;
> @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		 * If there's an owner, wait for it to either
>  		 * release the lock or go to sleep.
>  		 */
> -		mspin_lock(MLOCK(lock), &node);
> +		mcs_spin_lock(&lock->mcs_lock, &node);
>  		owner = ACCESS_ONCE(lock->owner);
>  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> -			mspin_unlock(MLOCK(lock), &node);
> +			mcs_spin_unlock(&lock->mcs_lock, &node);
>  			goto slowpath;
>  		}
> 
> @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  			}
> 
>  			mutex_set_owner(lock);
> -			mspin_unlock(MLOCK(lock), &node);
> +			mcs_spin_unlock(&lock->mcs_lock, &node);
>  			preempt_enable();
>  			return 0;
>  		}
> -		mspin_unlock(MLOCK(lock), &node);
> +		mcs_spin_unlock(&lock->mcs_lock, &node);
> 
>  		/*
>  		 * When there's no owner, we might have preempted between the
> -- 
> 1.7.4.4
> 
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
@ 2013-11-19 19:10     ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:10 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> We will need the MCS lock code for doing optimistic spinning for rwsem
> and queue rwlock.  Extracting the MCS code from mutex.c and put into
> its own file allow us to reuse this code easily.
> 
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>

Please see comments below.

							Thanx, Paul

> ---
>  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
>  include/linux/mutex.h        |    5 ++-
>  kernel/locking/mutex.c       |   60 ++++----------------------------------
>  3 files changed, 74 insertions(+), 55 deletions(-)
>  create mode 100644 include/linux/mcs_spinlock.h
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> new file mode 100644
> index 0000000..b5de3b0
> --- /dev/null
> +++ b/include/linux/mcs_spinlock.h
> @@ -0,0 +1,64 @@
> +/*
> + * MCS lock defines
> + *
> + * This file contains the main data structure and API definitions of MCS lock.
> + *
> + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> + * with the desirable properties of being fair, and with each cpu trying
> + * to acquire the lock spinning on a local variable.
> + * It avoids expensive cache bouncings that common test-and-set spin-lock
> + * implementations incur.
> + */
> +#ifndef __LINUX_MCS_SPINLOCK_H
> +#define __LINUX_MCS_SPINLOCK_H
> +
> +struct mcs_spinlock {
> +	struct mcs_spinlock *next;
> +	int locked; /* 1 if lock acquired */
> +};
> +
> +/*
> + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> + * time spent in this lock function.
> + */
> +static noinline
> +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +{
> +	struct mcs_spinlock *prev;
> +
> +	/* Init node */
> +	node->locked = 0;
> +	node->next   = NULL;
> +
> +	prev = xchg(lock, node);

OK, the full memory barriers implied by xchg() ensure that *node will be
initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
node into the list.  This rules out the misordering scenario that Tim
Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
on September 27th.

Assuming of course a corresponding barrier on the lock handoff side.

> +	if (likely(prev == NULL)) {
> +		/* Lock acquired */
> +		node->locked = 1;
> +		return;
> +	}
> +	ACCESS_ONCE(prev->next) = node;
> +	smp_wmb();

I don't see what the above memory barrier does.  Here are some things
that it cannot be doing:

o	Ordering the insertion into the list above with the polling
	below.  First, smp_wmb() does not order prior writes against
	later reads, and second misordering is harmless.  If we start
	polling before the insertion is complete, all that happens
	is that the first few polls have no chance of seeing a lock
	grant.

o	Ordering the polling against the initialization -- the above
	xchg() is already doing that for us.

So what is its purpose?

> +	/* Wait until the lock holder passes the lock down */
> +	while (!ACCESS_ONCE(node->locked))
> +		arch_mutex_cpu_relax();

On the other hand, I don't see how we get away without a barrier here.
As written, what prevents the caller's load from ->owner from being
reordered with the above load from ->locked?  (Perhaps you can argue
that such reordering is only a performance problem, but if so we need
that argument recorded in comments.)

Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
they will need a memory barrier here to prevent the critical section
from leaking out.

> +}
> +
> +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +{
> +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> +
> +	if (likely(!next)) {
> +		/*
> +		 * Release the lock by setting it to NULL
> +		 */
> +		if (cmpxchg(lock, node, NULL) == node)
> +			return;
> +		/* Wait until the next pointer is set */
> +		while (!(next = ACCESS_ONCE(node->next)))
> +			arch_mutex_cpu_relax();
> +	}

We need a memory barrier somewhere before here in this function,
otherwise the critical section can leak out.  I do not believe that
we can rely on the prohibition against speculative stores that Peter
Zijlstra and I have been discussing because that does not provide the
transitivity required by locking primitives.  I believe that we -could-
make the access below be an smp_store_release(), though.

Placing the barrier here (or at least not preceding the initial
fetch from node->next) has the advantage of allowing it to pair with
the xchg() in mcs_spin_lock(), though given the dependency only an
smp_read_barrier_depends() is required for that purpose.

> +	ACCESS_ONCE(next->locked) = 1;
> +	smp_wmb();

I don't see what this barrier does for us.  It is ordering the unlock
store with what, exactly?

If it really is doing something, we need a big fat comment stating what
that is, and checkpatch.pl will be happy to inform you.  ;-)

> +}
> +
> +#endif /* __LINUX_MCS_SPINLOCK_H */
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index bab49da..32a32e6 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -46,6 +46,7 @@
>   * - detects multi-task circular deadlocks and prints out all affected
>   *   locks and tasks (and only those tasks)
>   */
> +struct mcs_spinlock;
>  struct mutex {
>  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
>  	atomic_t		count;
> @@ -55,7 +56,7 @@ struct mutex {
>  	struct task_struct	*owner;
>  #endif
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> -	void			*spin_mlock;	/* Spinner MCS lock */
> +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
>  #endif
>  #ifdef CONFIG_DEBUG_MUTEXES
>  	const char 		*name;
> @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
>  # define arch_mutex_cpu_relax() cpu_relax()
>  #endif
> 
> -#endif
> +#endif /* __LINUX_MUTEX_H */
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index d24105b..e08b183 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -25,6 +25,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/interrupt.h>
>  #include <linux/debug_locks.h>
> +#include <linux/mcs_spinlock.h>
> 
>  /*
>   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
>  	INIT_LIST_HEAD(&lock->wait_list);
>  	mutex_clear_owner(lock);
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> -	lock->spin_mlock = NULL;
> +	lock->mcs_lock = NULL;
>  #endif
> 
>  	debug_mutex_init(lock, name, key);
> @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
>   * more or less simultaneously, the spinners need to acquire a MCS lock
>   * first before spinning on the owner field.
>   *
> - * We don't inline mspin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
>   */
> -struct mspin_node {
> -	struct mspin_node *next ;
> -	int		  locked;	/* 1 if lock acquired */
> -};
> -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> -
> -static noinline
> -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> -{
> -	struct mspin_node *prev;
> -
> -	/* Init node */
> -	node->locked = 0;
> -	node->next   = NULL;
> -
> -	prev = xchg(lock, node);
> -	if (likely(prev == NULL)) {
> -		/* Lock acquired */
> -		node->locked = 1;
> -		return;
> -	}
> -	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> -		arch_mutex_cpu_relax();
> -}
> -
> -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> -{
> -	struct mspin_node *next = ACCESS_ONCE(node->next);
> -
> -	if (likely(!next)) {
> -		/*
> -		 * Release the lock by setting it to NULL
> -		 */
> -		if (cmpxchg(lock, node, NULL) == node)
> -			return;
> -		/* Wait until the next pointer is set */
> -		while (!(next = ACCESS_ONCE(node->next)))
> -			arch_mutex_cpu_relax();
> -	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> -}
> 
>  /*
>   * Mutex spinning code migrated from kernel/sched/core.c
> @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> 
>  	for (;;) {
>  		struct task_struct *owner;
> -		struct mspin_node  node;
> +		struct mcs_spinlock  node;
> 
>  		if (use_ww_ctx && ww_ctx->acquired > 0) {
>  			struct ww_mutex *ww;
> @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		 * If there's an owner, wait for it to either
>  		 * release the lock or go to sleep.
>  		 */
> -		mspin_lock(MLOCK(lock), &node);
> +		mcs_spin_lock(&lock->mcs_lock, &node);
>  		owner = ACCESS_ONCE(lock->owner);
>  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> -			mspin_unlock(MLOCK(lock), &node);
> +			mcs_spin_unlock(&lock->mcs_lock, &node);
>  			goto slowpath;
>  		}
> 
> @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  			}
> 
>  			mutex_set_owner(lock);
> -			mspin_unlock(MLOCK(lock), &node);
> +			mcs_spin_unlock(&lock->mcs_lock, &node);
>  			preempt_enable();
>  			return 0;
>  		}
> -		mspin_unlock(MLOCK(lock), &node);
> +		mcs_spin_unlock(&lock->mcs_lock, &node);
> 
>  		/*
>  		 * When there's no owner, we might have preempted between the
> -- 
> 1.7.4.4
> 
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
  2013-11-08 19:52   ` Tim Chen
  (?)
@ 2013-11-19 19:13     ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:13 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> From: Jason Low <jason.low2@hp.com>
> 
> Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
> check in mcs_spin_unlock() likely() as it is likely that a race did not occur
> most of the time.
> 
> Also add in more comments describing how the local node is used in MCS locks.
> 
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> ---
>  include/linux/mcs_spinlock.h |   13 +++++++++++--
>  1 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> index b5de3b0..96f14299 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/include/linux/mcs_spinlock.h
> @@ -18,6 +18,12 @@ struct mcs_spinlock {
>  };
> 
>  /*
> + * In order to acquire the lock, the caller should declare a local node and
> + * pass a reference of the node to this function in addition to the lock.
> + * If the lock has already been acquired, then this will proceed to spin
> + * on this node->locked until the previous lock holder sets the node->locked
> + * in mcs_spin_unlock().
> + *
>   * We don't inline mcs_spin_lock() so that perf can correctly account for the
>   * time spent in this lock function.
>   */
> @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	prev = xchg(lock, node);
>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
> -		node->locked = 1;

Agreed, no one looks at this field in this case, so no need to initialize
it, unless for debug purposes.

>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  		arch_mutex_cpu_relax();
>  }
> 
> +/*
> + * Releases the lock. The caller should pass in the corresponding node that
> + * was used to acquire the lock.
> + */
>  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  		/*
>  		 * Release the lock by setting it to NULL
>  		 */
> -		if (cmpxchg(lock, node, NULL) == node)
> +		if (likely(cmpxchg(lock, node, NULL) == node))

Agreed here as well.  Takes a narrow race to hit this.

So, did your testing exercise this path?  If the answer is "yes", and
if the issues that I called out in patch #1 are resolved:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

>  			return;
>  		/* Wait until the next pointer is set */
>  		while (!(next = ACCESS_ONCE(node->next)))
> -- 
> 1.7.4.4
> 
> 
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-19 19:13     ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:13 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin

On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> From: Jason Low <jason.low2@hp.com>
> 
> Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
> check in mcs_spin_unlock() likely() as it is likely that a race did not occur
> most of the time.
> 
> Also add in more comments describing how the local node is used in MCS locks.
> 
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> ---
>  include/linux/mcs_spinlock.h |   13 +++++++++++--
>  1 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> index b5de3b0..96f14299 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/include/linux/mcs_spinlock.h
> @@ -18,6 +18,12 @@ struct mcs_spinlock {
>  };
> 
>  /*
> + * In order to acquire the lock, the caller should declare a local node and
> + * pass a reference of the node to this function in addition to the lock.
> + * If the lock has already been acquired, then this will proceed to spin
> + * on this node->locked until the previous lock holder sets the node->locked
> + * in mcs_spin_unlock().
> + *
>   * We don't inline mcs_spin_lock() so that perf can correctly account for the
>   * time spent in this lock function.
>   */
> @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	prev = xchg(lock, node);
>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
> -		node->locked = 1;

Agreed, no one looks at this field in this case, so no need to initialize
it, unless for debug purposes.

>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  		arch_mutex_cpu_relax();
>  }
> 
> +/*
> + * Releases the lock. The caller should pass in the corresponding node that
> + * was used to acquire the lock.
> + */
>  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  		/*
>  		 * Release the lock by setting it to NULL
>  		 */
> -		if (cmpxchg(lock, node, NULL) == node)
> +		if (likely(cmpxchg(lock, node, NULL) == node))

Agreed here as well.  Takes a narrow race to hit this.

So, did your testing exercise this path?  If the answer is "yes", and
if the issues that I called out in patch #1 are resolved:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

>  			return;
>  		/* Wait until the next pointer is set */
>  		while (!(next = ACCESS_ONCE(node->next)))
> -- 
> 1.7.4.4
> 
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-19 19:13     ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:13 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> From: Jason Low <jason.low2@hp.com>
> 
> Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
> check in mcs_spin_unlock() likely() as it is likely that a race did not occur
> most of the time.
> 
> Also add in more comments describing how the local node is used in MCS locks.
> 
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> ---
>  include/linux/mcs_spinlock.h |   13 +++++++++++--
>  1 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> index b5de3b0..96f14299 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/include/linux/mcs_spinlock.h
> @@ -18,6 +18,12 @@ struct mcs_spinlock {
>  };
> 
>  /*
> + * In order to acquire the lock, the caller should declare a local node and
> + * pass a reference of the node to this function in addition to the lock.
> + * If the lock has already been acquired, then this will proceed to spin
> + * on this node->locked until the previous lock holder sets the node->locked
> + * in mcs_spin_unlock().
> + *
>   * We don't inline mcs_spin_lock() so that perf can correctly account for the
>   * time spent in this lock function.
>   */
> @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	prev = xchg(lock, node);
>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
> -		node->locked = 1;

Agreed, no one looks at this field in this case, so no need to initialize
it, unless for debug purposes.

>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  		arch_mutex_cpu_relax();
>  }
> 
> +/*
> + * Releases the lock. The caller should pass in the corresponding node that
> + * was used to acquire the lock.
> + */
>  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  		/*
>  		 * Release the lock by setting it to NULL
>  		 */
> -		if (cmpxchg(lock, node, NULL) == node)
> +		if (likely(cmpxchg(lock, node, NULL) == node))

Agreed here as well.  Takes a narrow race to hit this.

So, did your testing exercise this path?  If the answer is "yes", and
if the issues that I called out in patch #1 are resolved:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

>  			return;
>  		/* Wait until the next pointer is set */
>  		while (!(next = ACCESS_ONCE(node->next)))
> -- 
> 1.7.4.4
> 
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file
  2013-11-08 19:52   ` Tim Chen
  (?)
@ 2013-11-19 19:15     ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:15 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:15AM -0800, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> The following changes are made:
> 
> 1) Create a new mcs_spinlock.c file to contain the
>    mcs_spin_lock() and mcs_spin_unlock() function.
> 2) Include a number of prerequisite header files and define
>    arch_mutex_cpu_relax(), if not previously defined so the
>    mcs functions can be compiled for multiple architecture without
>    causing problems.
> 
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>

Assuming issues called out in patch #1 are resolved and testing called
out for patch #2 has happened:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> ---
>  include/linux/mcs_spinlock.h                       |   56 ++------------------
>  kernel/locking/Makefile                            |    6 +-
>  .../locking/mcs_spinlock.c                         |   31 ++++++-----
>  3 files changed, 23 insertions(+), 70 deletions(-)
>  copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%)
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> index 96f14299..d54bb23 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/include/linux/mcs_spinlock.h
> @@ -17,57 +17,9 @@ struct mcs_spinlock {
>  	int locked; /* 1 if lock acquired */
>  };
> 
> -/*
> - * In order to acquire the lock, the caller should declare a local node and
> - * pass a reference of the node to this function in addition to the lock.
> - * If the lock has already been acquired, then this will proceed to spin
> - * on this node->locked until the previous lock holder sets the node->locked
> - * in mcs_spin_unlock().
> - *
> - * We don't inline mcs_spin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
> - */
> -static noinline
> -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> -{
> -	struct mcs_spinlock *prev;
> -
> -	/* Init node */
> -	node->locked = 0;
> -	node->next   = NULL;
> -
> -	prev = xchg(lock, node);
> -	if (likely(prev == NULL)) {
> -		/* Lock acquired */
> -		return;
> -	}
> -	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> -		arch_mutex_cpu_relax();
> -}
> -
> -/*
> - * Releases the lock. The caller should pass in the corresponding node that
> - * was used to acquire the lock.
> - */
> -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> -{
> -	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> -
> -	if (likely(!next)) {
> -		/*
> -		 * Release the lock by setting it to NULL
> -		 */
> -		if (likely(cmpxchg(lock, node, NULL) == node))
> -			return;
> -		/* Wait until the next pointer is set */
> -		while (!(next = ACCESS_ONCE(node->next)))
> -			arch_mutex_cpu_relax();
> -	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> -}
> +extern
> +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
> +extern
> +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
> 
>  #endif /* __LINUX_MCS_SPINLOCK_H */
> diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
> index baab8e5..20d9d5c 100644
> --- a/kernel/locking/Makefile
> +++ b/kernel/locking/Makefile
> @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
>  ifeq ($(CONFIG_PROC_FS),y)
>  obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
>  endif
> -obj-$(CONFIG_SMP) += spinlock.o
> -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
> +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
> +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
>  obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
>  obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
>  obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
> -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
> +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
>  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
>  obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
>  obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
> diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
> similarity index 78%
> copy from include/linux/mcs_spinlock.h
> copy to kernel/locking/mcs_spinlock.c
> index 96f14299..b6f27f8 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -1,7 +1,5 @@
>  /*
> - * MCS lock defines
> - *
> - * This file contains the main data structure and API definitions of MCS lock.
> + * MCS lock
>   *
>   * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
>   * with the desirable properties of being fair, and with each cpu trying
> @@ -9,13 +7,20 @@
>   * It avoids expensive cache bouncings that common test-and-set spin-lock
>   * implementations incur.
>   */
> -#ifndef __LINUX_MCS_SPINLOCK_H
> -#define __LINUX_MCS_SPINLOCK_H
> +/*
> + * asm/processor.h may define arch_mutex_cpu_relax().
> + * If it is not defined, cpu_relax() will be used.
> + */
> +#include <asm/barrier.h>
> +#include <asm/cmpxchg.h>
> +#include <asm/processor.h>
> +#include <linux/compiler.h>
> +#include <linux/mcs_spinlock.h>
> +#include <linux/export.h>
> 
> -struct mcs_spinlock {
> -	struct mcs_spinlock *next;
> -	int locked; /* 1 if lock acquired */
> -};
> +#ifndef arch_mutex_cpu_relax
> +# define arch_mutex_cpu_relax() cpu_relax()
> +#endif
> 
>  /*
>   * In order to acquire the lock, the caller should declare a local node and
> @@ -23,11 +28,7 @@ struct mcs_spinlock {
>   * If the lock has already been acquired, then this will proceed to spin
>   * on this node->locked until the previous lock holder sets the node->locked
>   * in mcs_spin_unlock().
> - *
> - * We don't inline mcs_spin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
>   */
> -static noinline
>  void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *prev;
> @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	while (!ACCESS_ONCE(node->locked))
>  		arch_mutex_cpu_relax();
>  }
> +EXPORT_SYMBOL_GPL(mcs_spin_lock);
> 
>  /*
>   * Releases the lock. The caller should pass in the corresponding node that
> @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  	ACCESS_ONCE(next->locked) = 1;
>  	smp_wmb();
>  }
> -
> -#endif /* __LINUX_MCS_SPINLOCK_H */
> +EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> -- 
> 1.7.4.4
> 
> 
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file
@ 2013-11-19 19:15     ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:15 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin

On Fri, Nov 08, 2013 at 11:52:15AM -0800, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> The following changes are made:
> 
> 1) Create a new mcs_spinlock.c file to contain the
>    mcs_spin_lock() and mcs_spin_unlock() function.
> 2) Include a number of prerequisite header files and define
>    arch_mutex_cpu_relax(), if not previously defined so the
>    mcs functions can be compiled for multiple architecture without
>    causing problems.
> 
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>

Assuming issues called out in patch #1 are resolved and testing called
out for patch #2 has happened:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> ---
>  include/linux/mcs_spinlock.h                       |   56 ++------------------
>  kernel/locking/Makefile                            |    6 +-
>  .../locking/mcs_spinlock.c                         |   31 ++++++-----
>  3 files changed, 23 insertions(+), 70 deletions(-)
>  copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%)
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> index 96f14299..d54bb23 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/include/linux/mcs_spinlock.h
> @@ -17,57 +17,9 @@ struct mcs_spinlock {
>  	int locked; /* 1 if lock acquired */
>  };
> 
> -/*
> - * In order to acquire the lock, the caller should declare a local node and
> - * pass a reference of the node to this function in addition to the lock.
> - * If the lock has already been acquired, then this will proceed to spin
> - * on this node->locked until the previous lock holder sets the node->locked
> - * in mcs_spin_unlock().
> - *
> - * We don't inline mcs_spin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
> - */
> -static noinline
> -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> -{
> -	struct mcs_spinlock *prev;
> -
> -	/* Init node */
> -	node->locked = 0;
> -	node->next   = NULL;
> -
> -	prev = xchg(lock, node);
> -	if (likely(prev == NULL)) {
> -		/* Lock acquired */
> -		return;
> -	}
> -	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> -		arch_mutex_cpu_relax();
> -}
> -
> -/*
> - * Releases the lock. The caller should pass in the corresponding node that
> - * was used to acquire the lock.
> - */
> -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> -{
> -	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> -
> -	if (likely(!next)) {
> -		/*
> -		 * Release the lock by setting it to NULL
> -		 */
> -		if (likely(cmpxchg(lock, node, NULL) == node))
> -			return;
> -		/* Wait until the next pointer is set */
> -		while (!(next = ACCESS_ONCE(node->next)))
> -			arch_mutex_cpu_relax();
> -	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> -}
> +extern
> +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
> +extern
> +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
> 
>  #endif /* __LINUX_MCS_SPINLOCK_H */
> diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
> index baab8e5..20d9d5c 100644
> --- a/kernel/locking/Makefile
> +++ b/kernel/locking/Makefile
> @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
>  ifeq ($(CONFIG_PROC_FS),y)
>  obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
>  endif
> -obj-$(CONFIG_SMP) += spinlock.o
> -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
> +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
> +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
>  obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
>  obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
>  obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
> -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
> +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
>  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
>  obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
>  obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
> diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
> similarity index 78%
> copy from include/linux/mcs_spinlock.h
> copy to kernel/locking/mcs_spinlock.c
> index 96f14299..b6f27f8 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -1,7 +1,5 @@
>  /*
> - * MCS lock defines
> - *
> - * This file contains the main data structure and API definitions of MCS lock.
> + * MCS lock
>   *
>   * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
>   * with the desirable properties of being fair, and with each cpu trying
> @@ -9,13 +7,20 @@
>   * It avoids expensive cache bouncings that common test-and-set spin-lock
>   * implementations incur.
>   */
> -#ifndef __LINUX_MCS_SPINLOCK_H
> -#define __LINUX_MCS_SPINLOCK_H
> +/*
> + * asm/processor.h may define arch_mutex_cpu_relax().
> + * If it is not defined, cpu_relax() will be used.
> + */
> +#include <asm/barrier.h>
> +#include <asm/cmpxchg.h>
> +#include <asm/processor.h>
> +#include <linux/compiler.h>
> +#include <linux/mcs_spinlock.h>
> +#include <linux/export.h>
> 
> -struct mcs_spinlock {
> -	struct mcs_spinlock *next;
> -	int locked; /* 1 if lock acquired */
> -};
> +#ifndef arch_mutex_cpu_relax
> +# define arch_mutex_cpu_relax() cpu_relax()
> +#endif
> 
>  /*
>   * In order to acquire the lock, the caller should declare a local node and
> @@ -23,11 +28,7 @@ struct mcs_spinlock {
>   * If the lock has already been acquired, then this will proceed to spin
>   * on this node->locked until the previous lock holder sets the node->locked
>   * in mcs_spin_unlock().
> - *
> - * We don't inline mcs_spin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
>   */
> -static noinline
>  void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *prev;
> @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	while (!ACCESS_ONCE(node->locked))
>  		arch_mutex_cpu_relax();
>  }
> +EXPORT_SYMBOL_GPL(mcs_spin_lock);
> 
>  /*
>   * Releases the lock. The caller should pass in the corresponding node that
> @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  	ACCESS_ONCE(next->locked) = 1;
>  	smp_wmb();
>  }
> -
> -#endif /* __LINUX_MCS_SPINLOCK_H */
> +EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> -- 
> 1.7.4.4
> 
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file
@ 2013-11-19 19:15     ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:15 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:15AM -0800, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> The following changes are made:
> 
> 1) Create a new mcs_spinlock.c file to contain the
>    mcs_spin_lock() and mcs_spin_unlock() function.
> 2) Include a number of prerequisite header files and define
>    arch_mutex_cpu_relax(), if not previously defined so the
>    mcs functions can be compiled for multiple architecture without
>    causing problems.
> 
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>

Assuming issues called out in patch #1 are resolved and testing called
out for patch #2 has happened:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> ---
>  include/linux/mcs_spinlock.h                       |   56 ++------------------
>  kernel/locking/Makefile                            |    6 +-
>  .../locking/mcs_spinlock.c                         |   31 ++++++-----
>  3 files changed, 23 insertions(+), 70 deletions(-)
>  copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%)
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> index 96f14299..d54bb23 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/include/linux/mcs_spinlock.h
> @@ -17,57 +17,9 @@ struct mcs_spinlock {
>  	int locked; /* 1 if lock acquired */
>  };
> 
> -/*
> - * In order to acquire the lock, the caller should declare a local node and
> - * pass a reference of the node to this function in addition to the lock.
> - * If the lock has already been acquired, then this will proceed to spin
> - * on this node->locked until the previous lock holder sets the node->locked
> - * in mcs_spin_unlock().
> - *
> - * We don't inline mcs_spin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
> - */
> -static noinline
> -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> -{
> -	struct mcs_spinlock *prev;
> -
> -	/* Init node */
> -	node->locked = 0;
> -	node->next   = NULL;
> -
> -	prev = xchg(lock, node);
> -	if (likely(prev == NULL)) {
> -		/* Lock acquired */
> -		return;
> -	}
> -	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> -		arch_mutex_cpu_relax();
> -}
> -
> -/*
> - * Releases the lock. The caller should pass in the corresponding node that
> - * was used to acquire the lock.
> - */
> -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> -{
> -	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> -
> -	if (likely(!next)) {
> -		/*
> -		 * Release the lock by setting it to NULL
> -		 */
> -		if (likely(cmpxchg(lock, node, NULL) == node))
> -			return;
> -		/* Wait until the next pointer is set */
> -		while (!(next = ACCESS_ONCE(node->next)))
> -			arch_mutex_cpu_relax();
> -	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> -}
> +extern
> +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
> +extern
> +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
> 
>  #endif /* __LINUX_MCS_SPINLOCK_H */
> diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
> index baab8e5..20d9d5c 100644
> --- a/kernel/locking/Makefile
> +++ b/kernel/locking/Makefile
> @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
>  ifeq ($(CONFIG_PROC_FS),y)
>  obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
>  endif
> -obj-$(CONFIG_SMP) += spinlock.o
> -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
> +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
> +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
>  obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
>  obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
>  obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
> -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
> +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
>  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
>  obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
>  obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
> diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
> similarity index 78%
> copy from include/linux/mcs_spinlock.h
> copy to kernel/locking/mcs_spinlock.c
> index 96f14299..b6f27f8 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -1,7 +1,5 @@
>  /*
> - * MCS lock defines
> - *
> - * This file contains the main data structure and API definitions of MCS lock.
> + * MCS lock
>   *
>   * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
>   * with the desirable properties of being fair, and with each cpu trying
> @@ -9,13 +7,20 @@
>   * It avoids expensive cache bouncings that common test-and-set spin-lock
>   * implementations incur.
>   */
> -#ifndef __LINUX_MCS_SPINLOCK_H
> -#define __LINUX_MCS_SPINLOCK_H
> +/*
> + * asm/processor.h may define arch_mutex_cpu_relax().
> + * If it is not defined, cpu_relax() will be used.
> + */
> +#include <asm/barrier.h>
> +#include <asm/cmpxchg.h>
> +#include <asm/processor.h>
> +#include <linux/compiler.h>
> +#include <linux/mcs_spinlock.h>
> +#include <linux/export.h>
> 
> -struct mcs_spinlock {
> -	struct mcs_spinlock *next;
> -	int locked; /* 1 if lock acquired */
> -};
> +#ifndef arch_mutex_cpu_relax
> +# define arch_mutex_cpu_relax() cpu_relax()
> +#endif
> 
>  /*
>   * In order to acquire the lock, the caller should declare a local node and
> @@ -23,11 +28,7 @@ struct mcs_spinlock {
>   * If the lock has already been acquired, then this will proceed to spin
>   * on this node->locked until the previous lock holder sets the node->locked
>   * in mcs_spin_unlock().
> - *
> - * We don't inline mcs_spin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
>   */
> -static noinline
>  void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *prev;
> @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	while (!ACCESS_ONCE(node->locked))
>  		arch_mutex_cpu_relax();
>  }
> +EXPORT_SYMBOL_GPL(mcs_spin_lock);
> 
>  /*
>   * Releases the lock. The caller should pass in the corresponding node that
> @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  	ACCESS_ONCE(next->locked) = 1;
>  	smp_wmb();
>  }
> -
> -#endif /* __LINUX_MCS_SPINLOCK_H */
> +EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> -- 
> 1.7.4.4
> 
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-08 19:52   ` Tim Chen
  (?)
@ 2013-11-19 19:21     ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:21 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> This patch corrects the way memory barriers are used in the MCS lock
> with smp_load_acquire and smp_store_release fucnction.
> It removes ones that are not needed.
> 
> It uses architecture specific load-acquire and store-release
> primitives for synchronization, if available. Generic implementations
> are provided in case they are not defined even though they may not
> be optimal. These generic implementation could be removed later on
> once changes are made in all the relevant header files.
> 
> Suggested-by: Michel Lespinasse <walken@google.com>
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>

Please see comments below.

							Thanx, Paul

> ---
>  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
>  1 files changed, 41 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> index b6f27f8..df5c167 100644
> --- a/kernel/locking/mcs_spinlock.c
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -23,6 +23,31 @@
>  #endif
> 
>  /*
> + * Fall back to use the regular atomic operations and memory barrier if
> + * the acquire/release versions are not defined.
> + */
> +#ifndef	xchg_acquire
> +# define xchg_acquire(p, v)		xchg(p, v)
> +#endif
> +
> +#ifndef	smp_load_acquire
> +# define smp_load_acquire(p)				\
> +	({						\
> +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> +		smp_mb();				\
> +		__v;					\
> +	})
> +#endif
> +
> +#ifndef smp_store_release
> +# define smp_store_release(p, v)		\
> +	do {					\
> +		smp_mb();			\
> +		ACCESS_ONCE(*(p)) = v;		\
> +	} while (0)
> +#endif
> +
> +/*
>   * In order to acquire the lock, the caller should declare a local node and
>   * pass a reference of the node to this function in addition to the lock.
>   * If the lock has already been acquired, then this will proceed to spin
> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	node->locked = 0;
>  	node->next   = NULL;
> 
> -	prev = xchg(lock, node);
> +	/* xchg() provides a memory barrier */
> +	prev = xchg_acquire(lock, node);

But if this is xchg_acquire() with only acquire semantics, it need not
ensure that the initializations of node->locked and node->next above
will happen before the "ACCESS_ONCE(prev->next) = node" below.  This
therefore needs to remain xchg().  Or you need an smp_store_release()
below instead of an ACCESS_ONCE() assignment.

As currently written, the poor CPU doing the unlock can be fatally
disappointed by seeing pre-initialized values of ->locked and ->next.
This could, among other things, result in a hang where the handoff
happens before the initialization.

>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> +	/*
> +	 * Wait until the lock holder passes the lock down.
> +	 * Using smp_load_acquire() provides a memory barrier that
> +	 * ensures subsequent operations happen after the lock is acquired.
> +	 */
> +	while (!(smp_load_acquire(&node->locked)))
>  		arch_mutex_cpu_relax();

OK, this smp_load_acquire() makes sense!

>  }
>  EXPORT_SYMBOL_GPL(mcs_spin_lock);
> @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
>   * Releases the lock. The caller should pass in the corresponding node that
>   * was used to acquire the lock.
>   */
> -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> 
> @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  		while (!(next = ACCESS_ONCE(node->next)))
>  			arch_mutex_cpu_relax();
>  	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> +	/*
> +	 * Pass lock to next waiter.
> +	 * smp_store_release() provides a memory barrier to ensure
> +	 * all operations in the critical section has been completed
> +	 * before unlocking.
> +	 */
> +	smp_store_release(&next->locked , 1);

This smp_store_release() makes sense as well!

Could you please get rid of the extraneous space before the comma?

>  }
>  EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> -- 
> 1.7.4.4
> 
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 19:21     ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:21 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott

On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> This patch corrects the way memory barriers are used in the MCS lock
> with smp_load_acquire and smp_store_release fucnction.
> It removes ones that are not needed.
> 
> It uses architecture specific load-acquire and store-release
> primitives for synchronization, if available. Generic implementations
> are provided in case they are not defined even though they may not
> be optimal. These generic implementation could be removed later on
> once changes are made in all the relevant header files.
> 
> Suggested-by: Michel Lespinasse <walken@google.com>
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>

Please see comments below.

							Thanx, Paul

> ---
>  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
>  1 files changed, 41 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> index b6f27f8..df5c167 100644
> --- a/kernel/locking/mcs_spinlock.c
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -23,6 +23,31 @@
>  #endif
> 
>  /*
> + * Fall back to use the regular atomic operations and memory barrier if
> + * the acquire/release versions are not defined.
> + */
> +#ifndef	xchg_acquire
> +# define xchg_acquire(p, v)		xchg(p, v)
> +#endif
> +
> +#ifndef	smp_load_acquire
> +# define smp_load_acquire(p)				\
> +	({						\
> +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> +		smp_mb();				\
> +		__v;					\
> +	})
> +#endif
> +
> +#ifndef smp_store_release
> +# define smp_store_release(p, v)		\
> +	do {					\
> +		smp_mb();			\
> +		ACCESS_ONCE(*(p)) = v;		\
> +	} while (0)
> +#endif
> +
> +/*
>   * In order to acquire the lock, the caller should declare a local node and
>   * pass a reference of the node to this function in addition to the lock.
>   * If the lock has already been acquired, then this will proceed to spin
> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	node->locked = 0;
>  	node->next   = NULL;
> 
> -	prev = xchg(lock, node);
> +	/* xchg() provides a memory barrier */
> +	prev = xchg_acquire(lock, node);

But if this is xchg_acquire() with only acquire semantics, it need not
ensure that the initializations of node->locked and node->next above
will happen before the "ACCESS_ONCE(prev->next) = node" below.  This
therefore needs to remain xchg().  Or you need an smp_store_release()
below instead of an ACCESS_ONCE() assignment.

As currently written, the poor CPU doing the unlock can be fatally
disappointed by seeing pre-initialized values of ->locked and ->next.
This could, among other things, result in a hang where the handoff
happens before the initialization.

>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> +	/*
> +	 * Wait until the lock holder passes the lock down.
> +	 * Using smp_load_acquire() provides a memory barrier that
> +	 * ensures subsequent operations happen after the lock is acquired.
> +	 */
> +	while (!(smp_load_acquire(&node->locked)))
>  		arch_mutex_cpu_relax();

OK, this smp_load_acquire() makes sense!

>  }
>  EXPORT_SYMBOL_GPL(mcs_spin_lock);
> @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
>   * Releases the lock. The caller should pass in the corresponding node that
>   * was used to acquire the lock.
>   */
> -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> 
> @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  		while (!(next = ACCESS_ONCE(node->next)))
>  			arch_mutex_cpu_relax();
>  	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> +	/*
> +	 * Pass lock to next waiter.
> +	 * smp_store_release() provides a memory barrier to ensure
> +	 * all operations in the critical section has been completed
> +	 * before unlocking.
> +	 */
> +	smp_store_release(&next->locked , 1);

This smp_store_release() makes sense as well!

Could you please get rid of the extraneous space before the comma?

>  }
>  EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> -- 
> 1.7.4.4
> 
> 

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 19:21     ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:21 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> This patch corrects the way memory barriers are used in the MCS lock
> with smp_load_acquire and smp_store_release fucnction.
> It removes ones that are not needed.
> 
> It uses architecture specific load-acquire and store-release
> primitives for synchronization, if available. Generic implementations
> are provided in case they are not defined even though they may not
> be optimal. These generic implementation could be removed later on
> once changes are made in all the relevant header files.
> 
> Suggested-by: Michel Lespinasse <walken@google.com>
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>

Please see comments below.

							Thanx, Paul

> ---
>  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
>  1 files changed, 41 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> index b6f27f8..df5c167 100644
> --- a/kernel/locking/mcs_spinlock.c
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -23,6 +23,31 @@
>  #endif
> 
>  /*
> + * Fall back to use the regular atomic operations and memory barrier if
> + * the acquire/release versions are not defined.
> + */
> +#ifndef	xchg_acquire
> +# define xchg_acquire(p, v)		xchg(p, v)
> +#endif
> +
> +#ifndef	smp_load_acquire
> +# define smp_load_acquire(p)				\
> +	({						\
> +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> +		smp_mb();				\
> +		__v;					\
> +	})
> +#endif
> +
> +#ifndef smp_store_release
> +# define smp_store_release(p, v)		\
> +	do {					\
> +		smp_mb();			\
> +		ACCESS_ONCE(*(p)) = v;		\
> +	} while (0)
> +#endif
> +
> +/*
>   * In order to acquire the lock, the caller should declare a local node and
>   * pass a reference of the node to this function in addition to the lock.
>   * If the lock has already been acquired, then this will proceed to spin
> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	node->locked = 0;
>  	node->next   = NULL;
> 
> -	prev = xchg(lock, node);
> +	/* xchg() provides a memory barrier */
> +	prev = xchg_acquire(lock, node);

But if this is xchg_acquire() with only acquire semantics, it need not
ensure that the initializations of node->locked and node->next above
will happen before the "ACCESS_ONCE(prev->next) = node" below.  This
therefore needs to remain xchg().  Or you need an smp_store_release()
below instead of an ACCESS_ONCE() assignment.

As currently written, the poor CPU doing the unlock can be fatally
disappointed by seeing pre-initialized values of ->locked and ->next.
This could, among other things, result in a hang where the handoff
happens before the initialization.

>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> +	/*
> +	 * Wait until the lock holder passes the lock down.
> +	 * Using smp_load_acquire() provides a memory barrier that
> +	 * ensures subsequent operations happen after the lock is acquired.
> +	 */
> +	while (!(smp_load_acquire(&node->locked)))
>  		arch_mutex_cpu_relax();

OK, this smp_load_acquire() makes sense!

>  }
>  EXPORT_SYMBOL_GPL(mcs_spin_lock);
> @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
>   * Releases the lock. The caller should pass in the corresponding node that
>   * was used to acquire the lock.
>   */
> -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> 
> @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  		while (!(next = ACCESS_ONCE(node->next)))
>  			arch_mutex_cpu_relax();
>  	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> +	/*
> +	 * Pass lock to next waiter.
> +	 * smp_store_release() provides a memory barrier to ensure
> +	 * all operations in the critical section has been completed
> +	 * before unlocking.
> +	 */
> +	smp_store_release(&next->locked , 1);

This smp_store_release() makes sense as well!

Could you please get rid of the extraneous space before the comma?

>  }
>  EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> -- 
> 1.7.4.4
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-11 18:20       ` Peter Zijlstra
@ 2013-11-19 19:23         ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:23 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Will Deacon, Tim Chen, Ingo Molnar, Andrew Morton,
	Thomas Gleixner, linux-kernel, linux-mm, linux-arch,
	Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra

On Mon, Nov 11, 2013 at 07:20:59PM +0100, Peter Zijlstra wrote:
> On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote:
> > > +	/*
> > > +	 * Wait until the lock holder passes the lock down.
> > > +	 * Using smp_load_acquire() provides a memory barrier that
> > > +	 * ensures subsequent operations happen after the lock is acquired.
> > > +	 */
> > > +	while (!(smp_load_acquire(&node->locked)))
> > >  		arch_mutex_cpu_relax();
> 
> > Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> > doesn't have a corresponding hook on the unlock side. On ARM, for example,
> > we can enter a low-power state using the wfe instruction, but that requires
> > the unlocker to wake up the core when the lock is released.
> 
> That said, it would be ever so awesome if we could come to some sort of
> conclusion on control dependencies here.
> 
> I _know_ C/C++ doesn't do them, but at the end of the day the compiler
> still generates ASM and as long as we're relatively certain there's a
> loop there (there has to be, right? :-), we could maybe rely on it
> anyway.

I believe that we will be able to rely on control dependencies leading
to stores (not loads!), but even then they are not guaranteed to provide
transitivity.  This lack of transitivity sounds problematic for locking
primitives.

							Thanx, Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 19:23         ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:23 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Will Deacon, Tim Chen, Ingo Molnar, Andrew Morton,
	Thomas Gleixner, linux-kernel, linux-mm, linux-arch,
	Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T,
	George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang

On Mon, Nov 11, 2013 at 07:20:59PM +0100, Peter Zijlstra wrote:
> On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote:
> > > +	/*
> > > +	 * Wait until the lock holder passes the lock down.
> > > +	 * Using smp_load_acquire() provides a memory barrier that
> > > +	 * ensures subsequent operations happen after the lock is acquired.
> > > +	 */
> > > +	while (!(smp_load_acquire(&node->locked)))
> > >  		arch_mutex_cpu_relax();
> 
> > Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> > doesn't have a corresponding hook on the unlock side. On ARM, for example,
> > we can enter a low-power state using the wfe instruction, but that requires
> > the unlocker to wake up the core when the lock is released.
> 
> That said, it would be ever so awesome if we could come to some sort of
> conclusion on control dependencies here.
> 
> I _know_ C/C++ doesn't do them, but at the end of the day the compiler
> still generates ASM and as long as we're relatively certain there's a
> loop there (there has to be, right? :-), we could maybe rely on it
> anyway.

I believe that we will be able to rely on control dependencies leading
to stores (not loads!), but even then they are not guaranteed to provide
transitivity.  This lack of transitivity sounds problematic for locking
primitives.

							Thanx, Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-11 18:10     ` Will Deacon
@ 2013-11-19 19:26       ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:26 UTC (permalink / raw)
  To: Will Deacon
  Cc: Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley

On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote:
> Hello,
> 
> On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote:
> > From: Waiman Long <Waiman.Long@hp.com>
> > 
> > This patch corrects the way memory barriers are used in the MCS lock
> > with smp_load_acquire and smp_store_release fucnction.
> > It removes ones that are not needed.
> > 
> > It uses architecture specific load-acquire and store-release
> > primitives for synchronization, if available. Generic implementations
> > are provided in case they are not defined even though they may not
> > be optimal. These generic implementation could be removed later on
> > once changes are made in all the relevant header files.
> > 
> > Suggested-by: Michel Lespinasse <walken@google.com>
> > Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > ---
> >  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 41 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > index b6f27f8..df5c167 100644
> > --- a/kernel/locking/mcs_spinlock.c
> > +++ b/kernel/locking/mcs_spinlock.c
> > @@ -23,6 +23,31 @@
> >  #endif
> >  
> >  /*
> > + * Fall back to use the regular atomic operations and memory barrier if
> > + * the acquire/release versions are not defined.
> > + */
> > +#ifndef	xchg_acquire
> > +# define xchg_acquire(p, v)		xchg(p, v)
> > +#endif
> > +
> > +#ifndef	smp_load_acquire
> > +# define smp_load_acquire(p)				\
> > +	({						\
> > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > +		smp_mb();				\
> > +		__v;					\
> > +	})
> > +#endif
> > +
> > +#ifndef smp_store_release
> > +# define smp_store_release(p, v)		\
> > +	do {					\
> > +		smp_mb();			\
> > +		ACCESS_ONCE(*(p)) = v;		\
> > +	} while (0)
> > +#endif
> 
> PeterZ already has a series implementing acquire/release accessors, so you
> should probably take a look at that rather than rolling your own here.
> 
> You could then augment that with [cmp]xchg_{acquire,release} as
> appropriate.
> 
> > +/*
> >   * In order to acquire the lock, the caller should declare a local node and
> >   * pass a reference of the node to this function in addition to the lock.
> >   * If the lock has already been acquired, then this will proceed to spin
> > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	node->locked = 0;
> >  	node->next   = NULL;
> >  
> > -	prev = xchg(lock, node);
> > +	/* xchg() provides a memory barrier */
> > +	prev = xchg_acquire(lock, node);
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();
> 
> After a chat with some micro-architects, I'm going to have to disagree with
> Paul here. For architectures where acquire/release are implemented with
> explicit barriers (similarly for simple microarchitectures), emitting
> barriers in a loop *is* going to have an affect on overall performance,
> since those barriers may well result in traffic outside of the core (at
> least, on ARM).

OK, that is a new one on me...

> Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> doesn't have a corresponding hook on the unlock side. On ARM, for example,
> we can enter a low-power state using the wfe instruction, but that requires
> the unlocker to wake up the core when the lock is released.
> 
> So, although I'm completely in favour of introducing acquire/release
> accessors, I really think the mcs locking routines would benefit from
> some arch-specific backend code, even if it's optional (although I would
> imagine most architectures implementing something to improve power and/or
> performance).

I would argue that if we have so much lock contention that spinners want
to enter low-power states, other fixes are in order.  Especially given
relatively small CPU counts.

							Thanx, Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 19:26       ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:26 UTC (permalink / raw)
  To: Will Deacon
  Cc: Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Figo.zhang

On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote:
> Hello,
> 
> On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote:
> > From: Waiman Long <Waiman.Long@hp.com>
> > 
> > This patch corrects the way memory barriers are used in the MCS lock
> > with smp_load_acquire and smp_store_release fucnction.
> > It removes ones that are not needed.
> > 
> > It uses architecture specific load-acquire and store-release
> > primitives for synchronization, if available. Generic implementations
> > are provided in case they are not defined even though they may not
> > be optimal. These generic implementation could be removed later on
> > once changes are made in all the relevant header files.
> > 
> > Suggested-by: Michel Lespinasse <walken@google.com>
> > Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > ---
> >  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 41 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > index b6f27f8..df5c167 100644
> > --- a/kernel/locking/mcs_spinlock.c
> > +++ b/kernel/locking/mcs_spinlock.c
> > @@ -23,6 +23,31 @@
> >  #endif
> >  
> >  /*
> > + * Fall back to use the regular atomic operations and memory barrier if
> > + * the acquire/release versions are not defined.
> > + */
> > +#ifndef	xchg_acquire
> > +# define xchg_acquire(p, v)		xchg(p, v)
> > +#endif
> > +
> > +#ifndef	smp_load_acquire
> > +# define smp_load_acquire(p)				\
> > +	({						\
> > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > +		smp_mb();				\
> > +		__v;					\
> > +	})
> > +#endif
> > +
> > +#ifndef smp_store_release
> > +# define smp_store_release(p, v)		\
> > +	do {					\
> > +		smp_mb();			\
> > +		ACCESS_ONCE(*(p)) = v;		\
> > +	} while (0)
> > +#endif
> 
> PeterZ already has a series implementing acquire/release accessors, so you
> should probably take a look at that rather than rolling your own here.
> 
> You could then augment that with [cmp]xchg_{acquire,release} as
> appropriate.
> 
> > +/*
> >   * In order to acquire the lock, the caller should declare a local node and
> >   * pass a reference of the node to this function in addition to the lock.
> >   * If the lock has already been acquired, then this will proceed to spin
> > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	node->locked = 0;
> >  	node->next   = NULL;
> >  
> > -	prev = xchg(lock, node);
> > +	/* xchg() provides a memory barrier */
> > +	prev = xchg_acquire(lock, node);
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();
> 
> After a chat with some micro-architects, I'm going to have to disagree with
> Paul here. For architectures where acquire/release are implemented with
> explicit barriers (similarly for simple microarchitectures), emitting
> barriers in a loop *is* going to have an affect on overall performance,
> since those barriers may well result in traffic outside of the core (at
> least, on ARM).

OK, that is a new one on me...

> Thinking more about that, the real issue here is that arch_mutex_cpu_relax()
> doesn't have a corresponding hook on the unlock side. On ARM, for example,
> we can enter a low-power state using the wfe instruction, but that requires
> the unlocker to wake up the core when the lock is released.
> 
> So, although I'm completely in favour of introducing acquire/release
> accessors, I really think the mcs locking routines would benefit from
> some arch-specific backend code, even if it's optional (although I would
> imagine most architectures implementing something to improve power and/or
> performance).

I would argue that if we have so much lock contention that spinners want
to enter low-power states, other fixes are in order.  Especially given
relatively small CPU counts.

							Thanx, Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-12  1:57         ` Waiman Long
@ 2013-11-19 19:32           ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:32 UTC (permalink / raw)
  To: Waiman Long
  Cc: Tim Chen, Peter Zijlstra, Will Deacon, Ingo Molnar,
	Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm,
	linux-arch, Linus Torvalds, Andrea Arcangeli, Alex Shi,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley

On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote:
> On 11/11/2013 04:17 PM, Tim Chen wrote:
> >>You could then augment that with [cmp]xchg_{acquire,release} as
> >>appropriate.
> >>
> >>>+/*
> >>>   * In order to acquire the lock, the caller should declare a local node and
> >>>   * pass a reference of the node to this function in addition to the lock.
> >>>   * If the lock has already been acquired, then this will proceed to spin
> >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >>>  	node->locked = 0;
> >>>  	node->next   = NULL;
> >>>
> >>>-	prev = xchg(lock, node);
> >>>+	/* xchg() provides a memory barrier */
> >>>+	prev = xchg_acquire(lock, node);
> >>>  	if (likely(prev == NULL)) {
> >>>  		/* Lock acquired */
> >>>  		return;
> >>>  	}
> >>>  	ACCESS_ONCE(prev->next) = node;
> >>>-	smp_wmb();
> >>>-	/* Wait until the lock holder passes the lock down */
> >>>-	while (!ACCESS_ONCE(node->locked))
> >>>+	/*
> >>>+	 * Wait until the lock holder passes the lock down.
> >>>+	 * Using smp_load_acquire() provides a memory barrier that
> >>>+	 * ensures subsequent operations happen after the lock is acquired.
> >>>+	 */
> >>>+	while (!(smp_load_acquire(&node->locked)))
> >>>  		arch_mutex_cpu_relax();
> >An alternate implementation is
> >	while (!ACCESS_ONCE(node->locked))
> >		arch_mutex_cpu_relax();
> >	smp_load_acquire(&node->locked);
> >
> >Leaving the smp_load_acquire at the end to provide appropriate barrier.
> >Will that be acceptable?
> >
> >Tim
> 
> I second Tim's opinion. It will be help to have a
> smp_mb_load_acquire() function that provide a memory barrier with
> load-acquire semantic. I don't think we need one for store-release
> as that will not be in a loop.

Hmmm...  I guess the ACCESS_ONCE() in the smp_load_acquire() should
prevent it from being optimized away.  But yes, you then end up with
an extra load on the critical lock hand-off patch.  And something
like an smp_mb_acquire() could then be useful, although I believe
that on all current hardware smp_mb_acquire() emits the same code
as would an smp_mb_release():

o	barrier() on TSO systems such as x86 and s390.

o	lwsync instruction on powerpc.  (Really old systems would
	want a different instruction for smp_mb_acquire(), but let's
	not optimize for really old systems.)

o	dmb instruction on ARM.

o	mf instruction on ia64.

So how about an smp_mb_acquire_release() to cover both use cases?
This could be used to further optimize circular buffers, for example.

							Thanx, Paul

> Peter, what do you think about adding that to your patch?
> 
> -Longman
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 19:32           ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:32 UTC (permalink / raw)
  To: Waiman Long
  Cc: Tim Chen, Peter Zijlstra, Will Deacon, Ingo Molnar,
	Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm,
	linux-arch, Linus Torvalds, Andrea Arcangeli, Alex Shi,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T,
	George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang

On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote:
> On 11/11/2013 04:17 PM, Tim Chen wrote:
> >>You could then augment that with [cmp]xchg_{acquire,release} as
> >>appropriate.
> >>
> >>>+/*
> >>>   * In order to acquire the lock, the caller should declare a local node and
> >>>   * pass a reference of the node to this function in addition to the lock.
> >>>   * If the lock has already been acquired, then this will proceed to spin
> >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >>>  	node->locked = 0;
> >>>  	node->next   = NULL;
> >>>
> >>>-	prev = xchg(lock, node);
> >>>+	/* xchg() provides a memory barrier */
> >>>+	prev = xchg_acquire(lock, node);
> >>>  	if (likely(prev == NULL)) {
> >>>  		/* Lock acquired */
> >>>  		return;
> >>>  	}
> >>>  	ACCESS_ONCE(prev->next) = node;
> >>>-	smp_wmb();
> >>>-	/* Wait until the lock holder passes the lock down */
> >>>-	while (!ACCESS_ONCE(node->locked))
> >>>+	/*
> >>>+	 * Wait until the lock holder passes the lock down.
> >>>+	 * Using smp_load_acquire() provides a memory barrier that
> >>>+	 * ensures subsequent operations happen after the lock is acquired.
> >>>+	 */
> >>>+	while (!(smp_load_acquire(&node->locked)))
> >>>  		arch_mutex_cpu_relax();
> >An alternate implementation is
> >	while (!ACCESS_ONCE(node->locked))
> >		arch_mutex_cpu_relax();
> >	smp_load_acquire(&node->locked);
> >
> >Leaving the smp_load_acquire at the end to provide appropriate barrier.
> >Will that be acceptable?
> >
> >Tim
> 
> I second Tim's opinion. It will be help to have a
> smp_mb_load_acquire() function that provide a memory barrier with
> load-acquire semantic. I don't think we need one for store-release
> as that will not be in a loop.

Hmmm...  I guess the ACCESS_ONCE() in the smp_load_acquire() should
prevent it from being optimized away.  But yes, you then end up with
an extra load on the critical lock hand-off patch.  And something
like an smp_mb_acquire() could then be useful, although I believe
that on all current hardware smp_mb_acquire() emits the same code
as would an smp_mb_release():

o	barrier() on TSO systems such as x86 and s390.

o	lwsync instruction on powerpc.  (Really old systems would
	want a different instruction for smp_mb_acquire(), but let's
	not optimize for really old systems.)

o	dmb instruction on ARM.

o	mf instruction on ia64.

So how about an smp_mb_acquire_release() to cover both use cases?
This could be used to further optimize circular buffers, for example.

							Thanx, Paul

> Peter, what do you think about adding that to your patch?
> 
> -Longman
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
  2013-11-19 19:10     ` Paul E. McKenney
  (?)
@ 2013-11-19 19:42       ` Tim Chen
  -1 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> > We will need the MCS lock code for doing optimistic spinning for rwsem
> > and queue rwlock.  Extracting the MCS code from mutex.c and put into
> > its own file allow us to reuse this code easily.
> > 
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
> 
> Please see comments below.
> 

Thanks for reviewing the code.

> 							Thanx, Paul
> 
> > ---
> >  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
> >  include/linux/mutex.h        |    5 ++-
> >  kernel/locking/mutex.c       |   60 ++++----------------------------------
> >  3 files changed, 74 insertions(+), 55 deletions(-)
> >  create mode 100644 include/linux/mcs_spinlock.h
> > 
> > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > new file mode 100644
> > index 0000000..b5de3b0
> > --- /dev/null
> > +++ b/include/linux/mcs_spinlock.h
> > @@ -0,0 +1,64 @@
> > +/*
> > + * MCS lock defines
> > + *
> > + * This file contains the main data structure and API definitions of MCS lock.
> > + *
> > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> > + * with the desirable properties of being fair, and with each cpu trying
> > + * to acquire the lock spinning on a local variable.
> > + * It avoids expensive cache bouncings that common test-and-set spin-lock
> > + * implementations incur.
> > + */
> > +#ifndef __LINUX_MCS_SPINLOCK_H
> > +#define __LINUX_MCS_SPINLOCK_H
> > +
> > +struct mcs_spinlock {
> > +	struct mcs_spinlock *next;
> > +	int locked; /* 1 if lock acquired */
> > +};
> > +
> > +/*
> > + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> > + * time spent in this lock function.
> > + */
> > +static noinline
> > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +{
> > +	struct mcs_spinlock *prev;
> > +
> > +	/* Init node */
> > +	node->locked = 0;
> > +	node->next   = NULL;
> > +
> > +	prev = xchg(lock, node);
> 
> OK, the full memory barriers implied by xchg() ensure that *node will be
> initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
> node into the list.  This rules out the misordering scenario that Tim
> Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
> on September 27th.
> 
> Assuming of course a corresponding barrier on the lock handoff side.
> 
> > +	if (likely(prev == NULL)) {
> > +		/* Lock acquired */
> > +		node->locked = 1;
> > +		return;
> > +	}
> > +	ACCESS_ONCE(prev->next) = node;
> > +	smp_wmb();
> 
> I don't see what the above memory barrier does.  Here are some things
> that it cannot be doing:
> 
> o	Ordering the insertion into the list above with the polling
> 	below.  First, smp_wmb() does not order prior writes against
> 	later reads, and second misordering is harmless.  If we start
> 	polling before the insertion is complete, all that happens
> 	is that the first few polls have no chance of seeing a lock
> 	grant.
> 
> o	Ordering the polling against the initialization -- the above
> 	xchg() is already doing that for us.
> 
> So what is its purpose?

Agree that the smp_wmb is not needed.  It is in the existing mcs code
residing in mutex.c and we're re-factoring the code only here and hasn't
corrected the memory barrier.

The particular smp_wmb() is removed in Patch 4/4 that corrects the
memory barriers.

> 
> > +	/* Wait until the lock holder passes the lock down */
> > +	while (!ACCESS_ONCE(node->locked))
> > +		arch_mutex_cpu_relax();
> 
> On the other hand, I don't see how we get away without a barrier here.
> As written, what prevents the caller's load from ->owner from being
> reordered with the above load from ->locked?  (Perhaps you can argue
> that such reordering is only a performance problem, but if so we need
> that argument recorded in comments.)
> 
> Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
> they will need a memory barrier here to prevent the critical section
> from leaking out.

Agree too.  The appropriate memory barrier is added in Patch 4/4.

> 
> > +}
> > +
> > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +{
> > +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > +
> > +	if (likely(!next)) {
> > +		/*
> > +		 * Release the lock by setting it to NULL
> > +		 */
> > +		if (cmpxchg(lock, node, NULL) == node)
> > +			return;
> > +		/* Wait until the next pointer is set */
> > +		while (!(next = ACCESS_ONCE(node->next)))
> > +			arch_mutex_cpu_relax();
> > +	}
> 
> We need a memory barrier somewhere before here in this function,
> otherwise the critical section can leak out.  I do not believe that
> we can rely on the prohibition against speculative stores that Peter
> Zijlstra and I have been discussing because that does not provide the
> transitivity required by locking primitives.  I believe that we -could-
> make the access below be an smp_store_release(), though.
> 
> Placing the barrier here (or at least not preceding the initial
> fetch from node->next) has the advantage of allowing it to pair with
> the xchg() in mcs_spin_lock(), though given the dependency only an
> smp_read_barrier_depends() is required for that purpose.
> 
> > +	ACCESS_ONCE(next->locked) = 1;
> > +	smp_wmb();
> 
> I don't see what this barrier does for us.  It is ordering the unlock
> store with what, exactly?
> 
> If it really is doing something, we need a big fat comment stating what
> that is, and checkpatch.pl will be happy to inform you.  ;-)
> 
> > +}
> > +
> > +#endif /* __LINUX_MCS_SPINLOCK_H */
> > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > index bab49da..32a32e6 100644
> > --- a/include/linux/mutex.h
> > +++ b/include/linux/mutex.h
> > @@ -46,6 +46,7 @@
> >   * - detects multi-task circular deadlocks and prints out all affected
> >   *   locks and tasks (and only those tasks)
> >   */
> > +struct mcs_spinlock;
> >  struct mutex {
> >  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
> >  	atomic_t		count;
> > @@ -55,7 +56,7 @@ struct mutex {
> >  	struct task_struct	*owner;
> >  #endif
> >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > -	void			*spin_mlock;	/* Spinner MCS lock */
> > +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
> >  #endif
> >  #ifdef CONFIG_DEBUG_MUTEXES
> >  	const char 		*name;
> > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
> >  # define arch_mutex_cpu_relax() cpu_relax()
> >  #endif
> > 
> > -#endif
> > +#endif /* __LINUX_MUTEX_H */
> > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > index d24105b..e08b183 100644
> > --- a/kernel/locking/mutex.c
> > +++ b/kernel/locking/mutex.c
> > @@ -25,6 +25,7 @@
> >  #include <linux/spinlock.h>
> >  #include <linux/interrupt.h>
> >  #include <linux/debug_locks.h>
> > +#include <linux/mcs_spinlock.h>
> > 
> >  /*
> >   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> >  	INIT_LIST_HEAD(&lock->wait_list);
> >  	mutex_clear_owner(lock);
> >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > -	lock->spin_mlock = NULL;
> > +	lock->mcs_lock = NULL;
> >  #endif
> > 
> >  	debug_mutex_init(lock, name, key);
> > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
> >   * more or less simultaneously, the spinners need to acquire a MCS lock
> >   * first before spinning on the owner field.
> >   *
> > - * We don't inline mspin_lock() so that perf can correctly account for the
> > - * time spent in this lock function.
> >   */
> > -struct mspin_node {
> > -	struct mspin_node *next ;
> > -	int		  locked;	/* 1 if lock acquired */
> > -};
> > -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> > -
> > -static noinline
> > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> > -{
> > -	struct mspin_node *prev;
> > -
> > -	/* Init node */
> > -	node->locked = 0;
> > -	node->next   = NULL;
> > -
> > -	prev = xchg(lock, node);
> > -	if (likely(prev == NULL)) {
> > -		/* Lock acquired */
> > -		node->locked = 1;
> > -		return;
> > -	}
> > -	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > -		arch_mutex_cpu_relax();
> > -}
> > -
> > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> > -{
> > -	struct mspin_node *next = ACCESS_ONCE(node->next);
> > -
> > -	if (likely(!next)) {
> > -		/*
> > -		 * Release the lock by setting it to NULL
> > -		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > -			return;
> > -		/* Wait until the next pointer is set */
> > -		while (!(next = ACCESS_ONCE(node->next)))
> > -			arch_mutex_cpu_relax();
> > -	}
> > -	ACCESS_ONCE(next->locked) = 1;
> > -	smp_wmb();
> > -}
> > 
> >  /*
> >   * Mutex spinning code migrated from kernel/sched/core.c
> > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > 
> >  	for (;;) {
> >  		struct task_struct *owner;
> > -		struct mspin_node  node;
> > +		struct mcs_spinlock  node;
> > 
> >  		if (use_ww_ctx && ww_ctx->acquired > 0) {
> >  			struct ww_mutex *ww;
> > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >  		 * If there's an owner, wait for it to either
> >  		 * release the lock or go to sleep.
> >  		 */
> > -		mspin_lock(MLOCK(lock), &node);
> > +		mcs_spin_lock(&lock->mcs_lock, &node);
> >  		owner = ACCESS_ONCE(lock->owner);
> >  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> > -			mspin_unlock(MLOCK(lock), &node);
> > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> >  			goto slowpath;
> >  		}
> > 
> > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >  			}
> > 
> >  			mutex_set_owner(lock);
> > -			mspin_unlock(MLOCK(lock), &node);
> > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> >  			preempt_enable();
> >  			return 0;
> >  		}
> > -		mspin_unlock(MLOCK(lock), &node);
> > +		mcs_spin_unlock(&lock->mcs_lock, &node);
> > 
> >  		/*
> >  		 * When there's no owner, we might have preempted between the
> > -- 
> > 1.7.4.4
> > 
> > 
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/



^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
@ 2013-11-19 19:42       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott

On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> > We will need the MCS lock code for doing optimistic spinning for rwsem
> > and queue rwlock.  Extracting the MCS code from mutex.c and put into
> > its own file allow us to reuse this code easily.
> > 
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
> 
> Please see comments below.
> 

Thanks for reviewing the code.

> 							Thanx, Paul
> 
> > ---
> >  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
> >  include/linux/mutex.h        |    5 ++-
> >  kernel/locking/mutex.c       |   60 ++++----------------------------------
> >  3 files changed, 74 insertions(+), 55 deletions(-)
> >  create mode 100644 include/linux/mcs_spinlock.h
> > 
> > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > new file mode 100644
> > index 0000000..b5de3b0
> > --- /dev/null
> > +++ b/include/linux/mcs_spinlock.h
> > @@ -0,0 +1,64 @@
> > +/*
> > + * MCS lock defines
> > + *
> > + * This file contains the main data structure and API definitions of MCS lock.
> > + *
> > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> > + * with the desirable properties of being fair, and with each cpu trying
> > + * to acquire the lock spinning on a local variable.
> > + * It avoids expensive cache bouncings that common test-and-set spin-lock
> > + * implementations incur.
> > + */
> > +#ifndef __LINUX_MCS_SPINLOCK_H
> > +#define __LINUX_MCS_SPINLOCK_H
> > +
> > +struct mcs_spinlock {
> > +	struct mcs_spinlock *next;
> > +	int locked; /* 1 if lock acquired */
> > +};
> > +
> > +/*
> > + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> > + * time spent in this lock function.
> > + */
> > +static noinline
> > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +{
> > +	struct mcs_spinlock *prev;
> > +
> > +	/* Init node */
> > +	node->locked = 0;
> > +	node->next   = NULL;
> > +
> > +	prev = xchg(lock, node);
> 
> OK, the full memory barriers implied by xchg() ensure that *node will be
> initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
> node into the list.  This rules out the misordering scenario that Tim
> Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
> on September 27th.
> 
> Assuming of course a corresponding barrier on the lock handoff side.
> 
> > +	if (likely(prev == NULL)) {
> > +		/* Lock acquired */
> > +		node->locked = 1;
> > +		return;
> > +	}
> > +	ACCESS_ONCE(prev->next) = node;
> > +	smp_wmb();
> 
> I don't see what the above memory barrier does.  Here are some things
> that it cannot be doing:
> 
> o	Ordering the insertion into the list above with the polling
> 	below.  First, smp_wmb() does not order prior writes against
> 	later reads, and second misordering is harmless.  If we start
> 	polling before the insertion is complete, all that happens
> 	is that the first few polls have no chance of seeing a lock
> 	grant.
> 
> o	Ordering the polling against the initialization -- the above
> 	xchg() is already doing that for us.
> 
> So what is its purpose?

Agree that the smp_wmb is not needed.  It is in the existing mcs code
residing in mutex.c and we're re-factoring the code only here and hasn't
corrected the memory barrier.

The particular smp_wmb() is removed in Patch 4/4 that corrects the
memory barriers.

> 
> > +	/* Wait until the lock holder passes the lock down */
> > +	while (!ACCESS_ONCE(node->locked))
> > +		arch_mutex_cpu_relax();
> 
> On the other hand, I don't see how we get away without a barrier here.
> As written, what prevents the caller's load from ->owner from being
> reordered with the above load from ->locked?  (Perhaps you can argue
> that such reordering is only a performance problem, but if so we need
> that argument recorded in comments.)
> 
> Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
> they will need a memory barrier here to prevent the critical section
> from leaking out.

Agree too.  The appropriate memory barrier is added in Patch 4/4.

> 
> > +}
> > +
> > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +{
> > +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > +
> > +	if (likely(!next)) {
> > +		/*
> > +		 * Release the lock by setting it to NULL
> > +		 */
> > +		if (cmpxchg(lock, node, NULL) == node)
> > +			return;
> > +		/* Wait until the next pointer is set */
> > +		while (!(next = ACCESS_ONCE(node->next)))
> > +			arch_mutex_cpu_relax();
> > +	}
> 
> We need a memory barrier somewhere before here in this function,
> otherwise the critical section can leak out.  I do not believe that
> we can rely on the prohibition against speculative stores that Peter
> Zijlstra and I have been discussing because that does not provide the
> transitivity required by locking primitives.  I believe that we -could-
> make the access below be an smp_store_release(), though.
> 
> Placing the barrier here (or at least not preceding the initial
> fetch from node->next) has the advantage of allowing it to pair with
> the xchg() in mcs_spin_lock(), though given the dependency only an
> smp_read_barrier_depends() is required for that purpose.
> 
> > +	ACCESS_ONCE(next->locked) = 1;
> > +	smp_wmb();
> 
> I don't see what this barrier does for us.  It is ordering the unlock
> store with what, exactly?
> 
> If it really is doing something, we need a big fat comment stating what
> that is, and checkpatch.pl will be happy to inform you.  ;-)
> 
> > +}
> > +
> > +#endif /* __LINUX_MCS_SPINLOCK_H */
> > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > index bab49da..32a32e6 100644
> > --- a/include/linux/mutex.h
> > +++ b/include/linux/mutex.h
> > @@ -46,6 +46,7 @@
> >   * - detects multi-task circular deadlocks and prints out all affected
> >   *   locks and tasks (and only those tasks)
> >   */
> > +struct mcs_spinlock;
> >  struct mutex {
> >  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
> >  	atomic_t		count;
> > @@ -55,7 +56,7 @@ struct mutex {
> >  	struct task_struct	*owner;
> >  #endif
> >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > -	void			*spin_mlock;	/* Spinner MCS lock */
> > +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
> >  #endif
> >  #ifdef CONFIG_DEBUG_MUTEXES
> >  	const char 		*name;
> > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
> >  # define arch_mutex_cpu_relax() cpu_relax()
> >  #endif
> > 
> > -#endif
> > +#endif /* __LINUX_MUTEX_H */
> > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > index d24105b..e08b183 100644
> > --- a/kernel/locking/mutex.c
> > +++ b/kernel/locking/mutex.c
> > @@ -25,6 +25,7 @@
> >  #include <linux/spinlock.h>
> >  #include <linux/interrupt.h>
> >  #include <linux/debug_locks.h>
> > +#include <linux/mcs_spinlock.h>
> > 
> >  /*
> >   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> >  	INIT_LIST_HEAD(&lock->wait_list);
> >  	mutex_clear_owner(lock);
> >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > -	lock->spin_mlock = NULL;
> > +	lock->mcs_lock = NULL;
> >  #endif
> > 
> >  	debug_mutex_init(lock, name, key);
> > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
> >   * more or less simultaneously, the spinners need to acquire a MCS lock
> >   * first before spinning on the owner field.
> >   *
> > - * We don't inline mspin_lock() so that perf can correctly account for the
> > - * time spent in this lock function.
> >   */
> > -struct mspin_node {
> > -	struct mspin_node *next ;
> > -	int		  locked;	/* 1 if lock acquired */
> > -};
> > -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> > -
> > -static noinline
> > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> > -{
> > -	struct mspin_node *prev;
> > -
> > -	/* Init node */
> > -	node->locked = 0;
> > -	node->next   = NULL;
> > -
> > -	prev = xchg(lock, node);
> > -	if (likely(prev == NULL)) {
> > -		/* Lock acquired */
> > -		node->locked = 1;
> > -		return;
> > -	}
> > -	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > -		arch_mutex_cpu_relax();
> > -}
> > -
> > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> > -{
> > -	struct mspin_node *next = ACCESS_ONCE(node->next);
> > -
> > -	if (likely(!next)) {
> > -		/*
> > -		 * Release the lock by setting it to NULL
> > -		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > -			return;
> > -		/* Wait until the next pointer is set */
> > -		while (!(next = ACCESS_ONCE(node->next)))
> > -			arch_mutex_cpu_relax();
> > -	}
> > -	ACCESS_ONCE(next->locked) = 1;
> > -	smp_wmb();
> > -}
> > 
> >  /*
> >   * Mutex spinning code migrated from kernel/sched/core.c
> > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > 
> >  	for (;;) {
> >  		struct task_struct *owner;
> > -		struct mspin_node  node;
> > +		struct mcs_spinlock  node;
> > 
> >  		if (use_ww_ctx && ww_ctx->acquired > 0) {
> >  			struct ww_mutex *ww;
> > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >  		 * If there's an owner, wait for it to either
> >  		 * release the lock or go to sleep.
> >  		 */
> > -		mspin_lock(MLOCK(lock), &node);
> > +		mcs_spin_lock(&lock->mcs_lock, &node);
> >  		owner = ACCESS_ONCE(lock->owner);
> >  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> > -			mspin_unlock(MLOCK(lock), &node);
> > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> >  			goto slowpath;
> >  		}
> > 
> > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >  			}
> > 
> >  			mutex_set_owner(lock);
> > -			mspin_unlock(MLOCK(lock), &node);
> > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> >  			preempt_enable();
> >  			return 0;
> >  		}
> > -		mspin_unlock(MLOCK(lock), &node);
> > +		mcs_spin_unlock(&lock->mcs_lock, &node);
> > 
> >  		/*
> >  		 * When there's no owner, we might have preempted between the
> > -- 
> > 1.7.4.4
> > 
> > 
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
@ 2013-11-19 19:42       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> > We will need the MCS lock code for doing optimistic spinning for rwsem
> > and queue rwlock.  Extracting the MCS code from mutex.c and put into
> > its own file allow us to reuse this code easily.
> > 
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
> 
> Please see comments below.
> 

Thanks for reviewing the code.

> 							Thanx, Paul
> 
> > ---
> >  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
> >  include/linux/mutex.h        |    5 ++-
> >  kernel/locking/mutex.c       |   60 ++++----------------------------------
> >  3 files changed, 74 insertions(+), 55 deletions(-)
> >  create mode 100644 include/linux/mcs_spinlock.h
> > 
> > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > new file mode 100644
> > index 0000000..b5de3b0
> > --- /dev/null
> > +++ b/include/linux/mcs_spinlock.h
> > @@ -0,0 +1,64 @@
> > +/*
> > + * MCS lock defines
> > + *
> > + * This file contains the main data structure and API definitions of MCS lock.
> > + *
> > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> > + * with the desirable properties of being fair, and with each cpu trying
> > + * to acquire the lock spinning on a local variable.
> > + * It avoids expensive cache bouncings that common test-and-set spin-lock
> > + * implementations incur.
> > + */
> > +#ifndef __LINUX_MCS_SPINLOCK_H
> > +#define __LINUX_MCS_SPINLOCK_H
> > +
> > +struct mcs_spinlock {
> > +	struct mcs_spinlock *next;
> > +	int locked; /* 1 if lock acquired */
> > +};
> > +
> > +/*
> > + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> > + * time spent in this lock function.
> > + */
> > +static noinline
> > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +{
> > +	struct mcs_spinlock *prev;
> > +
> > +	/* Init node */
> > +	node->locked = 0;
> > +	node->next   = NULL;
> > +
> > +	prev = xchg(lock, node);
> 
> OK, the full memory barriers implied by xchg() ensure that *node will be
> initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
> node into the list.  This rules out the misordering scenario that Tim
> Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
> on September 27th.
> 
> Assuming of course a corresponding barrier on the lock handoff side.
> 
> > +	if (likely(prev == NULL)) {
> > +		/* Lock acquired */
> > +		node->locked = 1;
> > +		return;
> > +	}
> > +	ACCESS_ONCE(prev->next) = node;
> > +	smp_wmb();
> 
> I don't see what the above memory barrier does.  Here are some things
> that it cannot be doing:
> 
> o	Ordering the insertion into the list above with the polling
> 	below.  First, smp_wmb() does not order prior writes against
> 	later reads, and second misordering is harmless.  If we start
> 	polling before the insertion is complete, all that happens
> 	is that the first few polls have no chance of seeing a lock
> 	grant.
> 
> o	Ordering the polling against the initialization -- the above
> 	xchg() is already doing that for us.
> 
> So what is its purpose?

Agree that the smp_wmb is not needed.  It is in the existing mcs code
residing in mutex.c and we're re-factoring the code only here and hasn't
corrected the memory barrier.

The particular smp_wmb() is removed in Patch 4/4 that corrects the
memory barriers.

> 
> > +	/* Wait until the lock holder passes the lock down */
> > +	while (!ACCESS_ONCE(node->locked))
> > +		arch_mutex_cpu_relax();
> 
> On the other hand, I don't see how we get away without a barrier here.
> As written, what prevents the caller's load from ->owner from being
> reordered with the above load from ->locked?  (Perhaps you can argue
> that such reordering is only a performance problem, but if so we need
> that argument recorded in comments.)
> 
> Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
> they will need a memory barrier here to prevent the critical section
> from leaking out.

Agree too.  The appropriate memory barrier is added in Patch 4/4.

> 
> > +}
> > +
> > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +{
> > +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > +
> > +	if (likely(!next)) {
> > +		/*
> > +		 * Release the lock by setting it to NULL
> > +		 */
> > +		if (cmpxchg(lock, node, NULL) == node)
> > +			return;
> > +		/* Wait until the next pointer is set */
> > +		while (!(next = ACCESS_ONCE(node->next)))
> > +			arch_mutex_cpu_relax();
> > +	}
> 
> We need a memory barrier somewhere before here in this function,
> otherwise the critical section can leak out.  I do not believe that
> we can rely on the prohibition against speculative stores that Peter
> Zijlstra and I have been discussing because that does not provide the
> transitivity required by locking primitives.  I believe that we -could-
> make the access below be an smp_store_release(), though.
> 
> Placing the barrier here (or at least not preceding the initial
> fetch from node->next) has the advantage of allowing it to pair with
> the xchg() in mcs_spin_lock(), though given the dependency only an
> smp_read_barrier_depends() is required for that purpose.
> 
> > +	ACCESS_ONCE(next->locked) = 1;
> > +	smp_wmb();
> 
> I don't see what this barrier does for us.  It is ordering the unlock
> store with what, exactly?
> 
> If it really is doing something, we need a big fat comment stating what
> that is, and checkpatch.pl will be happy to inform you.  ;-)
> 
> > +}
> > +
> > +#endif /* __LINUX_MCS_SPINLOCK_H */
> > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > index bab49da..32a32e6 100644
> > --- a/include/linux/mutex.h
> > +++ b/include/linux/mutex.h
> > @@ -46,6 +46,7 @@
> >   * - detects multi-task circular deadlocks and prints out all affected
> >   *   locks and tasks (and only those tasks)
> >   */
> > +struct mcs_spinlock;
> >  struct mutex {
> >  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
> >  	atomic_t		count;
> > @@ -55,7 +56,7 @@ struct mutex {
> >  	struct task_struct	*owner;
> >  #endif
> >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > -	void			*spin_mlock;	/* Spinner MCS lock */
> > +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
> >  #endif
> >  #ifdef CONFIG_DEBUG_MUTEXES
> >  	const char 		*name;
> > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
> >  # define arch_mutex_cpu_relax() cpu_relax()
> >  #endif
> > 
> > -#endif
> > +#endif /* __LINUX_MUTEX_H */
> > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > index d24105b..e08b183 100644
> > --- a/kernel/locking/mutex.c
> > +++ b/kernel/locking/mutex.c
> > @@ -25,6 +25,7 @@
> >  #include <linux/spinlock.h>
> >  #include <linux/interrupt.h>
> >  #include <linux/debug_locks.h>
> > +#include <linux/mcs_spinlock.h>
> > 
> >  /*
> >   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> >  	INIT_LIST_HEAD(&lock->wait_list);
> >  	mutex_clear_owner(lock);
> >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > -	lock->spin_mlock = NULL;
> > +	lock->mcs_lock = NULL;
> >  #endif
> > 
> >  	debug_mutex_init(lock, name, key);
> > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
> >   * more or less simultaneously, the spinners need to acquire a MCS lock
> >   * first before spinning on the owner field.
> >   *
> > - * We don't inline mspin_lock() so that perf can correctly account for the
> > - * time spent in this lock function.
> >   */
> > -struct mspin_node {
> > -	struct mspin_node *next ;
> > -	int		  locked;	/* 1 if lock acquired */
> > -};
> > -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> > -
> > -static noinline
> > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> > -{
> > -	struct mspin_node *prev;
> > -
> > -	/* Init node */
> > -	node->locked = 0;
> > -	node->next   = NULL;
> > -
> > -	prev = xchg(lock, node);
> > -	if (likely(prev == NULL)) {
> > -		/* Lock acquired */
> > -		node->locked = 1;
> > -		return;
> > -	}
> > -	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > -		arch_mutex_cpu_relax();
> > -}
> > -
> > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> > -{
> > -	struct mspin_node *next = ACCESS_ONCE(node->next);
> > -
> > -	if (likely(!next)) {
> > -		/*
> > -		 * Release the lock by setting it to NULL
> > -		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > -			return;
> > -		/* Wait until the next pointer is set */
> > -		while (!(next = ACCESS_ONCE(node->next)))
> > -			arch_mutex_cpu_relax();
> > -	}
> > -	ACCESS_ONCE(next->locked) = 1;
> > -	smp_wmb();
> > -}
> > 
> >  /*
> >   * Mutex spinning code migrated from kernel/sched/core.c
> > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > 
> >  	for (;;) {
> >  		struct task_struct *owner;
> > -		struct mspin_node  node;
> > +		struct mcs_spinlock  node;
> > 
> >  		if (use_ww_ctx && ww_ctx->acquired > 0) {
> >  			struct ww_mutex *ww;
> > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >  		 * If there's an owner, wait for it to either
> >  		 * release the lock or go to sleep.
> >  		 */
> > -		mspin_lock(MLOCK(lock), &node);
> > +		mcs_spin_lock(&lock->mcs_lock, &node);
> >  		owner = ACCESS_ONCE(lock->owner);
> >  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> > -			mspin_unlock(MLOCK(lock), &node);
> > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> >  			goto slowpath;
> >  		}
> > 
> > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >  			}
> > 
> >  			mutex_set_owner(lock);
> > -			mspin_unlock(MLOCK(lock), &node);
> > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> >  			preempt_enable();
> >  			return 0;
> >  		}
> > -		mspin_unlock(MLOCK(lock), &node);
> > +		mcs_spin_unlock(&lock->mcs_lock, &node);
> > 
> >  		/*
> >  		 * When there's no owner, we might have preempted between the
> > -- 
> > 1.7.4.4
> > 
> > 
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
  2013-11-19 19:13     ` Paul E. McKenney
  (?)
@ 2013-11-19 19:42       ` Tim Chen
  -1 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > From: Jason Low <jason.low2@hp.com>
> > 
> > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
> > check in mcs_spin_unlock() likely() as it is likely that a race did not occur
> > most of the time.
> > 
> > Also add in more comments describing how the local node is used in MCS locks.
> > 
> > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > ---
> >  include/linux/mcs_spinlock.h |   13 +++++++++++--
> >  1 files changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > index b5de3b0..96f14299 100644
> > --- a/include/linux/mcs_spinlock.h
> > +++ b/include/linux/mcs_spinlock.h
> > @@ -18,6 +18,12 @@ struct mcs_spinlock {
> >  };
> > 
> >  /*
> > + * In order to acquire the lock, the caller should declare a local node and
> > + * pass a reference of the node to this function in addition to the lock.
> > + * If the lock has already been acquired, then this will proceed to spin
> > + * on this node->locked until the previous lock holder sets the node->locked
> > + * in mcs_spin_unlock().
> > + *
> >   * We don't inline mcs_spin_lock() so that perf can correctly account for the
> >   * time spent in this lock function.
> >   */
> > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	prev = xchg(lock, node);
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> > -		node->locked = 1;
> 
> Agreed, no one looks at this field in this case, so no need to initialize
> it, unless for debug purposes.
> 
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  		arch_mutex_cpu_relax();
> >  }
> > 
> > +/*
> > + * Releases the lock. The caller should pass in the corresponding node that
> > + * was used to acquire the lock.
> > + */
> >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		/*
> >  		 * Release the lock by setting it to NULL
> >  		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > +		if (likely(cmpxchg(lock, node, NULL) == node))
> 
> Agreed here as well.  Takes a narrow race to hit this.
> 
> So, did your testing exercise this path?  If the answer is "yes", and
> if the issues that I called out in patch #1 are resolved:

I haven't instrumented the code to check the hit rate of this path. But
the slow path probably will only get hit in some cases under 
heavy contention. 


> 
> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> >  			return;
> >  		/* Wait until the next pointer is set */
> >  		while (!(next = ACCESS_ONCE(node->next)))
> > -- 
> > 1.7.4.4
> > 
> > 
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/



^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-19 19:42       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott

On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > From: Jason Low <jason.low2@hp.com>
> > 
> > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
> > check in mcs_spin_unlock() likely() as it is likely that a race did not occur
> > most of the time.
> > 
> > Also add in more comments describing how the local node is used in MCS locks.
> > 
> > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > ---
> >  include/linux/mcs_spinlock.h |   13 +++++++++++--
> >  1 files changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > index b5de3b0..96f14299 100644
> > --- a/include/linux/mcs_spinlock.h
> > +++ b/include/linux/mcs_spinlock.h
> > @@ -18,6 +18,12 @@ struct mcs_spinlock {
> >  };
> > 
> >  /*
> > + * In order to acquire the lock, the caller should declare a local node and
> > + * pass a reference of the node to this function in addition to the lock.
> > + * If the lock has already been acquired, then this will proceed to spin
> > + * on this node->locked until the previous lock holder sets the node->locked
> > + * in mcs_spin_unlock().
> > + *
> >   * We don't inline mcs_spin_lock() so that perf can correctly account for the
> >   * time spent in this lock function.
> >   */
> > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	prev = xchg(lock, node);
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> > -		node->locked = 1;
> 
> Agreed, no one looks at this field in this case, so no need to initialize
> it, unless for debug purposes.
> 
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  		arch_mutex_cpu_relax();
> >  }
> > 
> > +/*
> > + * Releases the lock. The caller should pass in the corresponding node that
> > + * was used to acquire the lock.
> > + */
> >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		/*
> >  		 * Release the lock by setting it to NULL
> >  		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > +		if (likely(cmpxchg(lock, node, NULL) == node))
> 
> Agreed here as well.  Takes a narrow race to hit this.
> 
> So, did your testing exercise this path?  If the answer is "yes", and
> if the issues that I called out in patch #1 are resolved:

I haven't instrumented the code to check the hit rate of this path. But
the slow path probably will only get hit in some cases under 
heavy contention. 


> 
> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> >  			return;
> >  		/* Wait until the next pointer is set */
> >  		while (!(next = ACCESS_ONCE(node->next)))
> > -- 
> > 1.7.4.4
> > 
> > 
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-19 19:42       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > From: Jason Low <jason.low2@hp.com>
> > 
> > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
> > check in mcs_spin_unlock() likely() as it is likely that a race did not occur
> > most of the time.
> > 
> > Also add in more comments describing how the local node is used in MCS locks.
> > 
> > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > ---
> >  include/linux/mcs_spinlock.h |   13 +++++++++++--
> >  1 files changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > index b5de3b0..96f14299 100644
> > --- a/include/linux/mcs_spinlock.h
> > +++ b/include/linux/mcs_spinlock.h
> > @@ -18,6 +18,12 @@ struct mcs_spinlock {
> >  };
> > 
> >  /*
> > + * In order to acquire the lock, the caller should declare a local node and
> > + * pass a reference of the node to this function in addition to the lock.
> > + * If the lock has already been acquired, then this will proceed to spin
> > + * on this node->locked until the previous lock holder sets the node->locked
> > + * in mcs_spin_unlock().
> > + *
> >   * We don't inline mcs_spin_lock() so that perf can correctly account for the
> >   * time spent in this lock function.
> >   */
> > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	prev = xchg(lock, node);
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> > -		node->locked = 1;
> 
> Agreed, no one looks at this field in this case, so no need to initialize
> it, unless for debug purposes.
> 
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  		arch_mutex_cpu_relax();
> >  }
> > 
> > +/*
> > + * Releases the lock. The caller should pass in the corresponding node that
> > + * was used to acquire the lock.
> > + */
> >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		/*
> >  		 * Release the lock by setting it to NULL
> >  		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > +		if (likely(cmpxchg(lock, node, NULL) == node))
> 
> Agreed here as well.  Takes a narrow race to hit this.
> 
> So, did your testing exercise this path?  If the answer is "yes", and
> if the issues that I called out in patch #1 are resolved:

I haven't instrumented the code to check the hit rate of this path. But
the slow path probably will only get hit in some cases under 
heavy contention. 


> 
> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> >  			return;
> >  		/* Wait until the next pointer is set */
> >  		while (!(next = ACCESS_ONCE(node->next)))
> > -- 
> > 1.7.4.4
> > 
> > 
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-19 19:21     ` Paul E. McKenney
  (?)
@ 2013-11-19 19:46       ` Tim Chen
  -1 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:46 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:21 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote:
> > From: Waiman Long <Waiman.Long@hp.com>
> > 
> > This patch corrects the way memory barriers are used in the MCS lock
> > with smp_load_acquire and smp_store_release fucnction.
> > It removes ones that are not needed.
> > 
> > It uses architecture specific load-acquire and store-release
> > primitives for synchronization, if available. Generic implementations
> > are provided in case they are not defined even though they may not
> > be optimal. These generic implementation could be removed later on
> > once changes are made in all the relevant header files.
> > 
> > Suggested-by: Michel Lespinasse <walken@google.com>
> > Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> 
> Please see comments below.
> 
> 							Thanx, Paul
> 
> > ---
> >  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 41 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > index b6f27f8..df5c167 100644
> > --- a/kernel/locking/mcs_spinlock.c
> > +++ b/kernel/locking/mcs_spinlock.c
> > @@ -23,6 +23,31 @@
> >  #endif
> > 
> >  /*
> > + * Fall back to use the regular atomic operations and memory barrier if
> > + * the acquire/release versions are not defined.
> > + */
> > +#ifndef	xchg_acquire
> > +# define xchg_acquire(p, v)		xchg(p, v)
> > +#endif
> > +
> > +#ifndef	smp_load_acquire
> > +# define smp_load_acquire(p)				\
> > +	({						\
> > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > +		smp_mb();				\
> > +		__v;					\
> > +	})
> > +#endif
> > +
> > +#ifndef smp_store_release
> > +# define smp_store_release(p, v)		\
> > +	do {					\
> > +		smp_mb();			\
> > +		ACCESS_ONCE(*(p)) = v;		\
> > +	} while (0)
> > +#endif
> > +
> > +/*
> >   * In order to acquire the lock, the caller should declare a local node and
> >   * pass a reference of the node to this function in addition to the lock.
> >   * If the lock has already been acquired, then this will proceed to spin
> > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	node->locked = 0;
> >  	node->next   = NULL;
> > 
> > -	prev = xchg(lock, node);
> > +	/* xchg() provides a memory barrier */
> > +	prev = xchg_acquire(lock, node);
> 
> But if this is xchg_acquire() with only acquire semantics, it need not
> ensure that the initializations of node->locked and node->next above
> will happen before the "ACCESS_ONCE(prev->next) = node" below.  This
> therefore needs to remain xchg().  Or you need an smp_store_release()
> below instead of an ACCESS_ONCE() assignment.

Good point.  Will keep it as xchg.

> 
> As currently written, the poor CPU doing the unlock can be fatally
> disappointed by seeing pre-initialized values of ->locked and ->next.
> This could, among other things, result in a hang where the handoff
> happens before the initialization.
> 
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();
> 
> OK, this smp_load_acquire() makes sense!
> 
> >  }
> >  EXPORT_SYMBOL_GPL(mcs_spin_lock);
> > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
> >   * Releases the lock. The caller should pass in the corresponding node that
> >   * was used to acquire the lock.
> >   */
> > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > 
> > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		while (!(next = ACCESS_ONCE(node->next)))
> >  			arch_mutex_cpu_relax();
> >  	}
> > -	ACCESS_ONCE(next->locked) = 1;
> > -	smp_wmb();
> > +	/*
> > +	 * Pass lock to next waiter.
> > +	 * smp_store_release() provides a memory barrier to ensure
> > +	 * all operations in the critical section has been completed
> > +	 * before unlocking.
> > +	 */
> > +	smp_store_release(&next->locked , 1);
> 
> This smp_store_release() makes sense as well!
> 
> Could you please get rid of the extraneous space before the comma?

Will do.

> 
> >  }
> >  EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> > -- 
> > 1.7.4.4
> > 
> > 
> 



^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 19:46       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:46 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott

On Tue, 2013-11-19 at 11:21 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote:
> > From: Waiman Long <Waiman.Long@hp.com>
> > 
> > This patch corrects the way memory barriers are used in the MCS lock
> > with smp_load_acquire and smp_store_release fucnction.
> > It removes ones that are not needed.
> > 
> > It uses architecture specific load-acquire and store-release
> > primitives for synchronization, if available. Generic implementations
> > are provided in case they are not defined even though they may not
> > be optimal. These generic implementation could be removed later on
> > once changes are made in all the relevant header files.
> > 
> > Suggested-by: Michel Lespinasse <walken@google.com>
> > Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> 
> Please see comments below.
> 
> 							Thanx, Paul
> 
> > ---
> >  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 41 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > index b6f27f8..df5c167 100644
> > --- a/kernel/locking/mcs_spinlock.c
> > +++ b/kernel/locking/mcs_spinlock.c
> > @@ -23,6 +23,31 @@
> >  #endif
> > 
> >  /*
> > + * Fall back to use the regular atomic operations and memory barrier if
> > + * the acquire/release versions are not defined.
> > + */
> > +#ifndef	xchg_acquire
> > +# define xchg_acquire(p, v)		xchg(p, v)
> > +#endif
> > +
> > +#ifndef	smp_load_acquire
> > +# define smp_load_acquire(p)				\
> > +	({						\
> > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > +		smp_mb();				\
> > +		__v;					\
> > +	})
> > +#endif
> > +
> > +#ifndef smp_store_release
> > +# define smp_store_release(p, v)		\
> > +	do {					\
> > +		smp_mb();			\
> > +		ACCESS_ONCE(*(p)) = v;		\
> > +	} while (0)
> > +#endif
> > +
> > +/*
> >   * In order to acquire the lock, the caller should declare a local node and
> >   * pass a reference of the node to this function in addition to the lock.
> >   * If the lock has already been acquired, then this will proceed to spin
> > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	node->locked = 0;
> >  	node->next   = NULL;
> > 
> > -	prev = xchg(lock, node);
> > +	/* xchg() provides a memory barrier */
> > +	prev = xchg_acquire(lock, node);
> 
> But if this is xchg_acquire() with only acquire semantics, it need not
> ensure that the initializations of node->locked and node->next above
> will happen before the "ACCESS_ONCE(prev->next) = node" below.  This
> therefore needs to remain xchg().  Or you need an smp_store_release()
> below instead of an ACCESS_ONCE() assignment.

Good point.  Will keep it as xchg.

> 
> As currently written, the poor CPU doing the unlock can be fatally
> disappointed by seeing pre-initialized values of ->locked and ->next.
> This could, among other things, result in a hang where the handoff
> happens before the initialization.
> 
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();
> 
> OK, this smp_load_acquire() makes sense!
> 
> >  }
> >  EXPORT_SYMBOL_GPL(mcs_spin_lock);
> > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
> >   * Releases the lock. The caller should pass in the corresponding node that
> >   * was used to acquire the lock.
> >   */
> > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > 
> > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		while (!(next = ACCESS_ONCE(node->next)))
> >  			arch_mutex_cpu_relax();
> >  	}
> > -	ACCESS_ONCE(next->locked) = 1;
> > -	smp_wmb();
> > +	/*
> > +	 * Pass lock to next waiter.
> > +	 * smp_store_release() provides a memory barrier to ensure
> > +	 * all operations in the critical section has been completed
> > +	 * before unlocking.
> > +	 */
> > +	smp_store_release(&next->locked , 1);
> 
> This smp_store_release() makes sense as well!
> 
> Could you please get rid of the extraneous space before the comma?

Will do.

> 
> >  }
> >  EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> > -- 
> > 1.7.4.4
> > 
> > 
> 

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 19:46       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 19:46 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:21 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote:
> > From: Waiman Long <Waiman.Long@hp.com>
> > 
> > This patch corrects the way memory barriers are used in the MCS lock
> > with smp_load_acquire and smp_store_release fucnction.
> > It removes ones that are not needed.
> > 
> > It uses architecture specific load-acquire and store-release
> > primitives for synchronization, if available. Generic implementations
> > are provided in case they are not defined even though they may not
> > be optimal. These generic implementation could be removed later on
> > once changes are made in all the relevant header files.
> > 
> > Suggested-by: Michel Lespinasse <walken@google.com>
> > Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> 
> Please see comments below.
> 
> 							Thanx, Paul
> 
> > ---
> >  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 41 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > index b6f27f8..df5c167 100644
> > --- a/kernel/locking/mcs_spinlock.c
> > +++ b/kernel/locking/mcs_spinlock.c
> > @@ -23,6 +23,31 @@
> >  #endif
> > 
> >  /*
> > + * Fall back to use the regular atomic operations and memory barrier if
> > + * the acquire/release versions are not defined.
> > + */
> > +#ifndef	xchg_acquire
> > +# define xchg_acquire(p, v)		xchg(p, v)
> > +#endif
> > +
> > +#ifndef	smp_load_acquire
> > +# define smp_load_acquire(p)				\
> > +	({						\
> > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > +		smp_mb();				\
> > +		__v;					\
> > +	})
> > +#endif
> > +
> > +#ifndef smp_store_release
> > +# define smp_store_release(p, v)		\
> > +	do {					\
> > +		smp_mb();			\
> > +		ACCESS_ONCE(*(p)) = v;		\
> > +	} while (0)
> > +#endif
> > +
> > +/*
> >   * In order to acquire the lock, the caller should declare a local node and
> >   * pass a reference of the node to this function in addition to the lock.
> >   * If the lock has already been acquired, then this will proceed to spin
> > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	node->locked = 0;
> >  	node->next   = NULL;
> > 
> > -	prev = xchg(lock, node);
> > +	/* xchg() provides a memory barrier */
> > +	prev = xchg_acquire(lock, node);
> 
> But if this is xchg_acquire() with only acquire semantics, it need not
> ensure that the initializations of node->locked and node->next above
> will happen before the "ACCESS_ONCE(prev->next) = node" below.  This
> therefore needs to remain xchg().  Or you need an smp_store_release()
> below instead of an ACCESS_ONCE() assignment.

Good point.  Will keep it as xchg.

> 
> As currently written, the poor CPU doing the unlock can be fatally
> disappointed by seeing pre-initialized values of ->locked and ->next.
> This could, among other things, result in a hang where the handoff
> happens before the initialization.
> 
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();
> 
> OK, this smp_load_acquire() makes sense!
> 
> >  }
> >  EXPORT_SYMBOL_GPL(mcs_spin_lock);
> > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
> >   * Releases the lock. The caller should pass in the corresponding node that
> >   * was used to acquire the lock.
> >   */
> > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > 
> > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		while (!(next = ACCESS_ONCE(node->next)))
> >  			arch_mutex_cpu_relax();
> >  	}
> > -	ACCESS_ONCE(next->locked) = 1;
> > -	smp_wmb();
> > +	/*
> > +	 * Pass lock to next waiter.
> > +	 * smp_store_release() provides a memory barrier to ensure
> > +	 * all operations in the critical section has been completed
> > +	 * before unlocking.
> > +	 */
> > +	smp_store_release(&next->locked , 1);
> 
> This smp_store_release() makes sense as well!
> 
> Could you please get rid of the extraneous space before the comma?

Will do.

> 
> >  }
> >  EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> > -- 
> > 1.7.4.4
> > 
> > 
> 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
  2013-11-19 19:42       ` Tim Chen
  (?)
@ 2013-11-19 19:54         ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:54 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, Nov 19, 2013 at 11:42:32AM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote:
> > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> > > We will need the MCS lock code for doing optimistic spinning for rwsem
> > > and queue rwlock.  Extracting the MCS code from mutex.c and put into
> > > its own file allow us to reuse this code easily.
> > > 
> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
> > 
> > Please see comments below.
> > 
> 
> Thanks for reviewing the code.
> 
> > 							Thanx, Paul
> > 
> > > ---
> > >  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
> > >  include/linux/mutex.h        |    5 ++-
> > >  kernel/locking/mutex.c       |   60 ++++----------------------------------
> > >  3 files changed, 74 insertions(+), 55 deletions(-)
> > >  create mode 100644 include/linux/mcs_spinlock.h
> > > 
> > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > > new file mode 100644
> > > index 0000000..b5de3b0
> > > --- /dev/null
> > > +++ b/include/linux/mcs_spinlock.h
> > > @@ -0,0 +1,64 @@
> > > +/*
> > > + * MCS lock defines
> > > + *
> > > + * This file contains the main data structure and API definitions of MCS lock.
> > > + *
> > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> > > + * with the desirable properties of being fair, and with each cpu trying
> > > + * to acquire the lock spinning on a local variable.
> > > + * It avoids expensive cache bouncings that common test-and-set spin-lock
> > > + * implementations incur.
> > > + */
> > > +#ifndef __LINUX_MCS_SPINLOCK_H
> > > +#define __LINUX_MCS_SPINLOCK_H
> > > +
> > > +struct mcs_spinlock {
> > > +	struct mcs_spinlock *next;
> > > +	int locked; /* 1 if lock acquired */
> > > +};
> > > +
> > > +/*
> > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> > > + * time spent in this lock function.
> > > + */
> > > +static noinline
> > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > +{
> > > +	struct mcs_spinlock *prev;
> > > +
> > > +	/* Init node */
> > > +	node->locked = 0;
> > > +	node->next   = NULL;
> > > +
> > > +	prev = xchg(lock, node);
> > 
> > OK, the full memory barriers implied by xchg() ensure that *node will be
> > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
> > node into the list.  This rules out the misordering scenario that Tim
> > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
> > on September 27th.
> > 
> > Assuming of course a corresponding barrier on the lock handoff side.
> > 
> > > +	if (likely(prev == NULL)) {
> > > +		/* Lock acquired */
> > > +		node->locked = 1;
> > > +		return;
> > > +	}
> > > +	ACCESS_ONCE(prev->next) = node;
> > > +	smp_wmb();
> > 
> > I don't see what the above memory barrier does.  Here are some things
> > that it cannot be doing:
> > 
> > o	Ordering the insertion into the list above with the polling
> > 	below.  First, smp_wmb() does not order prior writes against
> > 	later reads, and second misordering is harmless.  If we start
> > 	polling before the insertion is complete, all that happens
> > 	is that the first few polls have no chance of seeing a lock
> > 	grant.
> > 
> > o	Ordering the polling against the initialization -- the above
> > 	xchg() is already doing that for us.
> > 
> > So what is its purpose?
> 
> Agree that the smp_wmb is not needed.  It is in the existing mcs code
> residing in mutex.c and we're re-factoring the code only here and hasn't
> corrected the memory barrier.

Ah, so I should have been more aggressive about reviewing some time back,
then...  ;-)

							Thanx, Paul

> The particular smp_wmb() is removed in Patch 4/4 that corrects the
> memory barriers.
> 
> > 
> > > +	/* Wait until the lock holder passes the lock down */
> > > +	while (!ACCESS_ONCE(node->locked))
> > > +		arch_mutex_cpu_relax();
> > 
> > On the other hand, I don't see how we get away without a barrier here.
> > As written, what prevents the caller's load from ->owner from being
> > reordered with the above load from ->locked?  (Perhaps you can argue
> > that such reordering is only a performance problem, but if so we need
> > that argument recorded in comments.)
> > 
> > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
> > they will need a memory barrier here to prevent the critical section
> > from leaking out.
> 
> Agree too.  The appropriate memory barrier is added in Patch 4/4.
> 
> > 
> > > +}
> > > +
> > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > +{
> > > +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > > +
> > > +	if (likely(!next)) {
> > > +		/*
> > > +		 * Release the lock by setting it to NULL
> > > +		 */
> > > +		if (cmpxchg(lock, node, NULL) == node)
> > > +			return;
> > > +		/* Wait until the next pointer is set */
> > > +		while (!(next = ACCESS_ONCE(node->next)))
> > > +			arch_mutex_cpu_relax();
> > > +	}
> > 
> > We need a memory barrier somewhere before here in this function,
> > otherwise the critical section can leak out.  I do not believe that
> > we can rely on the prohibition against speculative stores that Peter
> > Zijlstra and I have been discussing because that does not provide the
> > transitivity required by locking primitives.  I believe that we -could-
> > make the access below be an smp_store_release(), though.
> > 
> > Placing the barrier here (or at least not preceding the initial
> > fetch from node->next) has the advantage of allowing it to pair with
> > the xchg() in mcs_spin_lock(), though given the dependency only an
> > smp_read_barrier_depends() is required for that purpose.
> > 
> > > +	ACCESS_ONCE(next->locked) = 1;
> > > +	smp_wmb();
> > 
> > I don't see what this barrier does for us.  It is ordering the unlock
> > store with what, exactly?
> > 
> > If it really is doing something, we need a big fat comment stating what
> > that is, and checkpatch.pl will be happy to inform you.  ;-)
> > 
> > > +}
> > > +
> > > +#endif /* __LINUX_MCS_SPINLOCK_H */
> > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > > index bab49da..32a32e6 100644
> > > --- a/include/linux/mutex.h
> > > +++ b/include/linux/mutex.h
> > > @@ -46,6 +46,7 @@
> > >   * - detects multi-task circular deadlocks and prints out all affected
> > >   *   locks and tasks (and only those tasks)
> > >   */
> > > +struct mcs_spinlock;
> > >  struct mutex {
> > >  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
> > >  	atomic_t		count;
> > > @@ -55,7 +56,7 @@ struct mutex {
> > >  	struct task_struct	*owner;
> > >  #endif
> > >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > -	void			*spin_mlock;	/* Spinner MCS lock */
> > > +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
> > >  #endif
> > >  #ifdef CONFIG_DEBUG_MUTEXES
> > >  	const char 		*name;
> > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
> > >  # define arch_mutex_cpu_relax() cpu_relax()
> > >  #endif
> > > 
> > > -#endif
> > > +#endif /* __LINUX_MUTEX_H */
> > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > > index d24105b..e08b183 100644
> > > --- a/kernel/locking/mutex.c
> > > +++ b/kernel/locking/mutex.c
> > > @@ -25,6 +25,7 @@
> > >  #include <linux/spinlock.h>
> > >  #include <linux/interrupt.h>
> > >  #include <linux/debug_locks.h>
> > > +#include <linux/mcs_spinlock.h>
> > > 
> > >  /*
> > >   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> > >  	INIT_LIST_HEAD(&lock->wait_list);
> > >  	mutex_clear_owner(lock);
> > >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > -	lock->spin_mlock = NULL;
> > > +	lock->mcs_lock = NULL;
> > >  #endif
> > > 
> > >  	debug_mutex_init(lock, name, key);
> > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
> > >   * more or less simultaneously, the spinners need to acquire a MCS lock
> > >   * first before spinning on the owner field.
> > >   *
> > > - * We don't inline mspin_lock() so that perf can correctly account for the
> > > - * time spent in this lock function.
> > >   */
> > > -struct mspin_node {
> > > -	struct mspin_node *next ;
> > > -	int		  locked;	/* 1 if lock acquired */
> > > -};
> > > -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> > > -
> > > -static noinline
> > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> > > -{
> > > -	struct mspin_node *prev;
> > > -
> > > -	/* Init node */
> > > -	node->locked = 0;
> > > -	node->next   = NULL;
> > > -
> > > -	prev = xchg(lock, node);
> > > -	if (likely(prev == NULL)) {
> > > -		/* Lock acquired */
> > > -		node->locked = 1;
> > > -		return;
> > > -	}
> > > -	ACCESS_ONCE(prev->next) = node;
> > > -	smp_wmb();
> > > -	/* Wait until the lock holder passes the lock down */
> > > -	while (!ACCESS_ONCE(node->locked))
> > > -		arch_mutex_cpu_relax();
> > > -}
> > > -
> > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> > > -{
> > > -	struct mspin_node *next = ACCESS_ONCE(node->next);
> > > -
> > > -	if (likely(!next)) {
> > > -		/*
> > > -		 * Release the lock by setting it to NULL
> > > -		 */
> > > -		if (cmpxchg(lock, node, NULL) == node)
> > > -			return;
> > > -		/* Wait until the next pointer is set */
> > > -		while (!(next = ACCESS_ONCE(node->next)))
> > > -			arch_mutex_cpu_relax();
> > > -	}
> > > -	ACCESS_ONCE(next->locked) = 1;
> > > -	smp_wmb();
> > > -}
> > > 
> > >  /*
> > >   * Mutex spinning code migrated from kernel/sched/core.c
> > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > > 
> > >  	for (;;) {
> > >  		struct task_struct *owner;
> > > -		struct mspin_node  node;
> > > +		struct mcs_spinlock  node;
> > > 
> > >  		if (use_ww_ctx && ww_ctx->acquired > 0) {
> > >  			struct ww_mutex *ww;
> > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > >  		 * If there's an owner, wait for it to either
> > >  		 * release the lock or go to sleep.
> > >  		 */
> > > -		mspin_lock(MLOCK(lock), &node);
> > > +		mcs_spin_lock(&lock->mcs_lock, &node);
> > >  		owner = ACCESS_ONCE(lock->owner);
> > >  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> > > -			mspin_unlock(MLOCK(lock), &node);
> > > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> > >  			goto slowpath;
> > >  		}
> > > 
> > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > >  			}
> > > 
> > >  			mutex_set_owner(lock);
> > > -			mspin_unlock(MLOCK(lock), &node);
> > > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> > >  			preempt_enable();
> > >  			return 0;
> > >  		}
> > > -		mspin_unlock(MLOCK(lock), &node);
> > > +		mcs_spin_unlock(&lock->mcs_lock, &node);
> > > 
> > >  		/*
> > >  		 * When there's no owner, we might have preempted between the
> > > -- 
> > > 1.7.4.4
> > > 
> > > 
> > > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> 
> 


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
@ 2013-11-19 19:54         ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:54 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott

On Tue, Nov 19, 2013 at 11:42:32AM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote:
> > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> > > We will need the MCS lock code for doing optimistic spinning for rwsem
> > > and queue rwlock.  Extracting the MCS code from mutex.c and put into
> > > its own file allow us to reuse this code easily.
> > > 
> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
> > 
> > Please see comments below.
> > 
> 
> Thanks for reviewing the code.
> 
> > 							Thanx, Paul
> > 
> > > ---
> > >  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
> > >  include/linux/mutex.h        |    5 ++-
> > >  kernel/locking/mutex.c       |   60 ++++----------------------------------
> > >  3 files changed, 74 insertions(+), 55 deletions(-)
> > >  create mode 100644 include/linux/mcs_spinlock.h
> > > 
> > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > > new file mode 100644
> > > index 0000000..b5de3b0
> > > --- /dev/null
> > > +++ b/include/linux/mcs_spinlock.h
> > > @@ -0,0 +1,64 @@
> > > +/*
> > > + * MCS lock defines
> > > + *
> > > + * This file contains the main data structure and API definitions of MCS lock.
> > > + *
> > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> > > + * with the desirable properties of being fair, and with each cpu trying
> > > + * to acquire the lock spinning on a local variable.
> > > + * It avoids expensive cache bouncings that common test-and-set spin-lock
> > > + * implementations incur.
> > > + */
> > > +#ifndef __LINUX_MCS_SPINLOCK_H
> > > +#define __LINUX_MCS_SPINLOCK_H
> > > +
> > > +struct mcs_spinlock {
> > > +	struct mcs_spinlock *next;
> > > +	int locked; /* 1 if lock acquired */
> > > +};
> > > +
> > > +/*
> > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> > > + * time spent in this lock function.
> > > + */
> > > +static noinline
> > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > +{
> > > +	struct mcs_spinlock *prev;
> > > +
> > > +	/* Init node */
> > > +	node->locked = 0;
> > > +	node->next   = NULL;
> > > +
> > > +	prev = xchg(lock, node);
> > 
> > OK, the full memory barriers implied by xchg() ensure that *node will be
> > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
> > node into the list.  This rules out the misordering scenario that Tim
> > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
> > on September 27th.
> > 
> > Assuming of course a corresponding barrier on the lock handoff side.
> > 
> > > +	if (likely(prev == NULL)) {
> > > +		/* Lock acquired */
> > > +		node->locked = 1;
> > > +		return;
> > > +	}
> > > +	ACCESS_ONCE(prev->next) = node;
> > > +	smp_wmb();
> > 
> > I don't see what the above memory barrier does.  Here are some things
> > that it cannot be doing:
> > 
> > o	Ordering the insertion into the list above with the polling
> > 	below.  First, smp_wmb() does not order prior writes against
> > 	later reads, and second misordering is harmless.  If we start
> > 	polling before the insertion is complete, all that happens
> > 	is that the first few polls have no chance of seeing a lock
> > 	grant.
> > 
> > o	Ordering the polling against the initialization -- the above
> > 	xchg() is already doing that for us.
> > 
> > So what is its purpose?
> 
> Agree that the smp_wmb is not needed.  It is in the existing mcs code
> residing in mutex.c and we're re-factoring the code only here and hasn't
> corrected the memory barrier.

Ah, so I should have been more aggressive about reviewing some time back,
then...  ;-)

							Thanx, Paul

> The particular smp_wmb() is removed in Patch 4/4 that corrects the
> memory barriers.
> 
> > 
> > > +	/* Wait until the lock holder passes the lock down */
> > > +	while (!ACCESS_ONCE(node->locked))
> > > +		arch_mutex_cpu_relax();
> > 
> > On the other hand, I don't see how we get away without a barrier here.
> > As written, what prevents the caller's load from ->owner from being
> > reordered with the above load from ->locked?  (Perhaps you can argue
> > that such reordering is only a performance problem, but if so we need
> > that argument recorded in comments.)
> > 
> > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
> > they will need a memory barrier here to prevent the critical section
> > from leaking out.
> 
> Agree too.  The appropriate memory barrier is added in Patch 4/4.
> 
> > 
> > > +}
> > > +
> > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > +{
> > > +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > > +
> > > +	if (likely(!next)) {
> > > +		/*
> > > +		 * Release the lock by setting it to NULL
> > > +		 */
> > > +		if (cmpxchg(lock, node, NULL) == node)
> > > +			return;
> > > +		/* Wait until the next pointer is set */
> > > +		while (!(next = ACCESS_ONCE(node->next)))
> > > +			arch_mutex_cpu_relax();
> > > +	}
> > 
> > We need a memory barrier somewhere before here in this function,
> > otherwise the critical section can leak out.  I do not believe that
> > we can rely on the prohibition against speculative stores that Peter
> > Zijlstra and I have been discussing because that does not provide the
> > transitivity required by locking primitives.  I believe that we -could-
> > make the access below be an smp_store_release(), though.
> > 
> > Placing the barrier here (or at least not preceding the initial
> > fetch from node->next) has the advantage of allowing it to pair with
> > the xchg() in mcs_spin_lock(), though given the dependency only an
> > smp_read_barrier_depends() is required for that purpose.
> > 
> > > +	ACCESS_ONCE(next->locked) = 1;
> > > +	smp_wmb();
> > 
> > I don't see what this barrier does for us.  It is ordering the unlock
> > store with what, exactly?
> > 
> > If it really is doing something, we need a big fat comment stating what
> > that is, and checkpatch.pl will be happy to inform you.  ;-)
> > 
> > > +}
> > > +
> > > +#endif /* __LINUX_MCS_SPINLOCK_H */
> > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > > index bab49da..32a32e6 100644
> > > --- a/include/linux/mutex.h
> > > +++ b/include/linux/mutex.h
> > > @@ -46,6 +46,7 @@
> > >   * - detects multi-task circular deadlocks and prints out all affected
> > >   *   locks and tasks (and only those tasks)
> > >   */
> > > +struct mcs_spinlock;
> > >  struct mutex {
> > >  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
> > >  	atomic_t		count;
> > > @@ -55,7 +56,7 @@ struct mutex {
> > >  	struct task_struct	*owner;
> > >  #endif
> > >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > -	void			*spin_mlock;	/* Spinner MCS lock */
> > > +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
> > >  #endif
> > >  #ifdef CONFIG_DEBUG_MUTEXES
> > >  	const char 		*name;
> > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
> > >  # define arch_mutex_cpu_relax() cpu_relax()
> > >  #endif
> > > 
> > > -#endif
> > > +#endif /* __LINUX_MUTEX_H */
> > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > > index d24105b..e08b183 100644
> > > --- a/kernel/locking/mutex.c
> > > +++ b/kernel/locking/mutex.c
> > > @@ -25,6 +25,7 @@
> > >  #include <linux/spinlock.h>
> > >  #include <linux/interrupt.h>
> > >  #include <linux/debug_locks.h>
> > > +#include <linux/mcs_spinlock.h>
> > > 
> > >  /*
> > >   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> > >  	INIT_LIST_HEAD(&lock->wait_list);
> > >  	mutex_clear_owner(lock);
> > >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > -	lock->spin_mlock = NULL;
> > > +	lock->mcs_lock = NULL;
> > >  #endif
> > > 
> > >  	debug_mutex_init(lock, name, key);
> > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
> > >   * more or less simultaneously, the spinners need to acquire a MCS lock
> > >   * first before spinning on the owner field.
> > >   *
> > > - * We don't inline mspin_lock() so that perf can correctly account for the
> > > - * time spent in this lock function.
> > >   */
> > > -struct mspin_node {
> > > -	struct mspin_node *next ;
> > > -	int		  locked;	/* 1 if lock acquired */
> > > -};
> > > -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> > > -
> > > -static noinline
> > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> > > -{
> > > -	struct mspin_node *prev;
> > > -
> > > -	/* Init node */
> > > -	node->locked = 0;
> > > -	node->next   = NULL;
> > > -
> > > -	prev = xchg(lock, node);
> > > -	if (likely(prev == NULL)) {
> > > -		/* Lock acquired */
> > > -		node->locked = 1;
> > > -		return;
> > > -	}
> > > -	ACCESS_ONCE(prev->next) = node;
> > > -	smp_wmb();
> > > -	/* Wait until the lock holder passes the lock down */
> > > -	while (!ACCESS_ONCE(node->locked))
> > > -		arch_mutex_cpu_relax();
> > > -}
> > > -
> > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> > > -{
> > > -	struct mspin_node *next = ACCESS_ONCE(node->next);
> > > -
> > > -	if (likely(!next)) {
> > > -		/*
> > > -		 * Release the lock by setting it to NULL
> > > -		 */
> > > -		if (cmpxchg(lock, node, NULL) == node)
> > > -			return;
> > > -		/* Wait until the next pointer is set */
> > > -		while (!(next = ACCESS_ONCE(node->next)))
> > > -			arch_mutex_cpu_relax();
> > > -	}
> > > -	ACCESS_ONCE(next->locked) = 1;
> > > -	smp_wmb();
> > > -}
> > > 
> > >  /*
> > >   * Mutex spinning code migrated from kernel/sched/core.c
> > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > > 
> > >  	for (;;) {
> > >  		struct task_struct *owner;
> > > -		struct mspin_node  node;
> > > +		struct mcs_spinlock  node;
> > > 
> > >  		if (use_ww_ctx && ww_ctx->acquired > 0) {
> > >  			struct ww_mutex *ww;
> > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > >  		 * If there's an owner, wait for it to either
> > >  		 * release the lock or go to sleep.
> > >  		 */
> > > -		mspin_lock(MLOCK(lock), &node);
> > > +		mcs_spin_lock(&lock->mcs_lock, &node);
> > >  		owner = ACCESS_ONCE(lock->owner);
> > >  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> > > -			mspin_unlock(MLOCK(lock), &node);
> > > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> > >  			goto slowpath;
> > >  		}
> > > 
> > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > >  			}
> > > 
> > >  			mutex_set_owner(lock);
> > > -			mspin_unlock(MLOCK(lock), &node);
> > > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> > >  			preempt_enable();
> > >  			return 0;
> > >  		}
> > > -		mspin_unlock(MLOCK(lock), &node);
> > > +		mcs_spin_unlock(&lock->mcs_lock, &node);
> > > 
> > >  		/*
> > >  		 * When there's no owner, we might have preempted between the
> > > -- 
> > > 1.7.4.4
> > > 
> > > 
> > > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> 
> 

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
@ 2013-11-19 19:54         ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:54 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, Nov 19, 2013 at 11:42:32AM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote:
> > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> > > We will need the MCS lock code for doing optimistic spinning for rwsem
> > > and queue rwlock.  Extracting the MCS code from mutex.c and put into
> > > its own file allow us to reuse this code easily.
> > > 
> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
> > 
> > Please see comments below.
> > 
> 
> Thanks for reviewing the code.
> 
> > 							Thanx, Paul
> > 
> > > ---
> > >  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
> > >  include/linux/mutex.h        |    5 ++-
> > >  kernel/locking/mutex.c       |   60 ++++----------------------------------
> > >  3 files changed, 74 insertions(+), 55 deletions(-)
> > >  create mode 100644 include/linux/mcs_spinlock.h
> > > 
> > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > > new file mode 100644
> > > index 0000000..b5de3b0
> > > --- /dev/null
> > > +++ b/include/linux/mcs_spinlock.h
> > > @@ -0,0 +1,64 @@
> > > +/*
> > > + * MCS lock defines
> > > + *
> > > + * This file contains the main data structure and API definitions of MCS lock.
> > > + *
> > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> > > + * with the desirable properties of being fair, and with each cpu trying
> > > + * to acquire the lock spinning on a local variable.
> > > + * It avoids expensive cache bouncings that common test-and-set spin-lock
> > > + * implementations incur.
> > > + */
> > > +#ifndef __LINUX_MCS_SPINLOCK_H
> > > +#define __LINUX_MCS_SPINLOCK_H
> > > +
> > > +struct mcs_spinlock {
> > > +	struct mcs_spinlock *next;
> > > +	int locked; /* 1 if lock acquired */
> > > +};
> > > +
> > > +/*
> > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> > > + * time spent in this lock function.
> > > + */
> > > +static noinline
> > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > +{
> > > +	struct mcs_spinlock *prev;
> > > +
> > > +	/* Init node */
> > > +	node->locked = 0;
> > > +	node->next   = NULL;
> > > +
> > > +	prev = xchg(lock, node);
> > 
> > OK, the full memory barriers implied by xchg() ensure that *node will be
> > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
> > node into the list.  This rules out the misordering scenario that Tim
> > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
> > on September 27th.
> > 
> > Assuming of course a corresponding barrier on the lock handoff side.
> > 
> > > +	if (likely(prev == NULL)) {
> > > +		/* Lock acquired */
> > > +		node->locked = 1;
> > > +		return;
> > > +	}
> > > +	ACCESS_ONCE(prev->next) = node;
> > > +	smp_wmb();
> > 
> > I don't see what the above memory barrier does.  Here are some things
> > that it cannot be doing:
> > 
> > o	Ordering the insertion into the list above with the polling
> > 	below.  First, smp_wmb() does not order prior writes against
> > 	later reads, and second misordering is harmless.  If we start
> > 	polling before the insertion is complete, all that happens
> > 	is that the first few polls have no chance of seeing a lock
> > 	grant.
> > 
> > o	Ordering the polling against the initialization -- the above
> > 	xchg() is already doing that for us.
> > 
> > So what is its purpose?
> 
> Agree that the smp_wmb is not needed.  It is in the existing mcs code
> residing in mutex.c and we're re-factoring the code only here and hasn't
> corrected the memory barrier.

Ah, so I should have been more aggressive about reviewing some time back,
then...  ;-)

							Thanx, Paul

> The particular smp_wmb() is removed in Patch 4/4 that corrects the
> memory barriers.
> 
> > 
> > > +	/* Wait until the lock holder passes the lock down */
> > > +	while (!ACCESS_ONCE(node->locked))
> > > +		arch_mutex_cpu_relax();
> > 
> > On the other hand, I don't see how we get away without a barrier here.
> > As written, what prevents the caller's load from ->owner from being
> > reordered with the above load from ->locked?  (Perhaps you can argue
> > that such reordering is only a performance problem, but if so we need
> > that argument recorded in comments.)
> > 
> > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
> > they will need a memory barrier here to prevent the critical section
> > from leaking out.
> 
> Agree too.  The appropriate memory barrier is added in Patch 4/4.
> 
> > 
> > > +}
> > > +
> > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > +{
> > > +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > > +
> > > +	if (likely(!next)) {
> > > +		/*
> > > +		 * Release the lock by setting it to NULL
> > > +		 */
> > > +		if (cmpxchg(lock, node, NULL) == node)
> > > +			return;
> > > +		/* Wait until the next pointer is set */
> > > +		while (!(next = ACCESS_ONCE(node->next)))
> > > +			arch_mutex_cpu_relax();
> > > +	}
> > 
> > We need a memory barrier somewhere before here in this function,
> > otherwise the critical section can leak out.  I do not believe that
> > we can rely on the prohibition against speculative stores that Peter
> > Zijlstra and I have been discussing because that does not provide the
> > transitivity required by locking primitives.  I believe that we -could-
> > make the access below be an smp_store_release(), though.
> > 
> > Placing the barrier here (or at least not preceding the initial
> > fetch from node->next) has the advantage of allowing it to pair with
> > the xchg() in mcs_spin_lock(), though given the dependency only an
> > smp_read_barrier_depends() is required for that purpose.
> > 
> > > +	ACCESS_ONCE(next->locked) = 1;
> > > +	smp_wmb();
> > 
> > I don't see what this barrier does for us.  It is ordering the unlock
> > store with what, exactly?
> > 
> > If it really is doing something, we need a big fat comment stating what
> > that is, and checkpatch.pl will be happy to inform you.  ;-)
> > 
> > > +}
> > > +
> > > +#endif /* __LINUX_MCS_SPINLOCK_H */
> > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > > index bab49da..32a32e6 100644
> > > --- a/include/linux/mutex.h
> > > +++ b/include/linux/mutex.h
> > > @@ -46,6 +46,7 @@
> > >   * - detects multi-task circular deadlocks and prints out all affected
> > >   *   locks and tasks (and only those tasks)
> > >   */
> > > +struct mcs_spinlock;
> > >  struct mutex {
> > >  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
> > >  	atomic_t		count;
> > > @@ -55,7 +56,7 @@ struct mutex {
> > >  	struct task_struct	*owner;
> > >  #endif
> > >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > -	void			*spin_mlock;	/* Spinner MCS lock */
> > > +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
> > >  #endif
> > >  #ifdef CONFIG_DEBUG_MUTEXES
> > >  	const char 		*name;
> > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
> > >  # define arch_mutex_cpu_relax() cpu_relax()
> > >  #endif
> > > 
> > > -#endif
> > > +#endif /* __LINUX_MUTEX_H */
> > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > > index d24105b..e08b183 100644
> > > --- a/kernel/locking/mutex.c
> > > +++ b/kernel/locking/mutex.c
> > > @@ -25,6 +25,7 @@
> > >  #include <linux/spinlock.h>
> > >  #include <linux/interrupt.h>
> > >  #include <linux/debug_locks.h>
> > > +#include <linux/mcs_spinlock.h>
> > > 
> > >  /*
> > >   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> > >  	INIT_LIST_HEAD(&lock->wait_list);
> > >  	mutex_clear_owner(lock);
> > >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > -	lock->spin_mlock = NULL;
> > > +	lock->mcs_lock = NULL;
> > >  #endif
> > > 
> > >  	debug_mutex_init(lock, name, key);
> > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
> > >   * more or less simultaneously, the spinners need to acquire a MCS lock
> > >   * first before spinning on the owner field.
> > >   *
> > > - * We don't inline mspin_lock() so that perf can correctly account for the
> > > - * time spent in this lock function.
> > >   */
> > > -struct mspin_node {
> > > -	struct mspin_node *next ;
> > > -	int		  locked;	/* 1 if lock acquired */
> > > -};
> > > -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> > > -
> > > -static noinline
> > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> > > -{
> > > -	struct mspin_node *prev;
> > > -
> > > -	/* Init node */
> > > -	node->locked = 0;
> > > -	node->next   = NULL;
> > > -
> > > -	prev = xchg(lock, node);
> > > -	if (likely(prev == NULL)) {
> > > -		/* Lock acquired */
> > > -		node->locked = 1;
> > > -		return;
> > > -	}
> > > -	ACCESS_ONCE(prev->next) = node;
> > > -	smp_wmb();
> > > -	/* Wait until the lock holder passes the lock down */
> > > -	while (!ACCESS_ONCE(node->locked))
> > > -		arch_mutex_cpu_relax();
> > > -}
> > > -
> > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> > > -{
> > > -	struct mspin_node *next = ACCESS_ONCE(node->next);
> > > -
> > > -	if (likely(!next)) {
> > > -		/*
> > > -		 * Release the lock by setting it to NULL
> > > -		 */
> > > -		if (cmpxchg(lock, node, NULL) == node)
> > > -			return;
> > > -		/* Wait until the next pointer is set */
> > > -		while (!(next = ACCESS_ONCE(node->next)))
> > > -			arch_mutex_cpu_relax();
> > > -	}
> > > -	ACCESS_ONCE(next->locked) = 1;
> > > -	smp_wmb();
> > > -}
> > > 
> > >  /*
> > >   * Mutex spinning code migrated from kernel/sched/core.c
> > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > > 
> > >  	for (;;) {
> > >  		struct task_struct *owner;
> > > -		struct mspin_node  node;
> > > +		struct mcs_spinlock  node;
> > > 
> > >  		if (use_ww_ctx && ww_ctx->acquired > 0) {
> > >  			struct ww_mutex *ww;
> > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > >  		 * If there's an owner, wait for it to either
> > >  		 * release the lock or go to sleep.
> > >  		 */
> > > -		mspin_lock(MLOCK(lock), &node);
> > > +		mcs_spin_lock(&lock->mcs_lock, &node);
> > >  		owner = ACCESS_ONCE(lock->owner);
> > >  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> > > -			mspin_unlock(MLOCK(lock), &node);
> > > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> > >  			goto slowpath;
> > >  		}
> > > 
> > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > >  			}
> > > 
> > >  			mutex_set_owner(lock);
> > > -			mspin_unlock(MLOCK(lock), &node);
> > > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> > >  			preempt_enable();
> > >  			return 0;
> > >  		}
> > > -		mspin_unlock(MLOCK(lock), &node);
> > > +		mcs_spin_unlock(&lock->mcs_lock, &node);
> > > 
> > >  		/*
> > >  		 * When there's no owner, we might have preempted between the
> > > -- 
> > > 1.7.4.4
> > > 
> > > 
> > > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> 
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-19 19:32           ` Paul E. McKenney
@ 2013-11-19 21:45             ` Tim Chen
  -1 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 21:45 UTC (permalink / raw)
  To: paulmck
  Cc: Waiman Long, Peter Zijlstra, Will Deacon, Ingo Molnar,
	Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm,
	linux-arch, Linus Torvalds, Andrea Arcangeli, Alex Shi,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T,
	George Spelvin, H. Peter Anvin

On Tue, 2013-11-19 at 11:32 -0800, Paul E. McKenney wrote:
> On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote:
> > On 11/11/2013 04:17 PM, Tim Chen wrote:
> > >>You could then augment that with [cmp]xchg_{acquire,release} as
> > >>appropriate.
> > >>
> > >>>+/*
> > >>>   * In order to acquire the lock, the caller should declare a local node and
> > >>>   * pass a reference of the node to this function in addition to the lock.
> > >>>   * If the lock has already been acquired, then this will proceed to spin
> > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > >>>  	node->locked = 0;
> > >>>  	node->next   = NULL;
> > >>>
> > >>>-	prev = xchg(lock, node);
> > >>>+	/* xchg() provides a memory barrier */
> > >>>+	prev = xchg_acquire(lock, node);
> > >>>  	if (likely(prev == NULL)) {
> > >>>  		/* Lock acquired */
> > >>>  		return;
> > >>>  	}
> > >>>  	ACCESS_ONCE(prev->next) = node;
> > >>>-	smp_wmb();
> > >>>-	/* Wait until the lock holder passes the lock down */
> > >>>-	while (!ACCESS_ONCE(node->locked))
> > >>>+	/*
> > >>>+	 * Wait until the lock holder passes the lock down.
> > >>>+	 * Using smp_load_acquire() provides a memory barrier that
> > >>>+	 * ensures subsequent operations happen after the lock is acquired.
> > >>>+	 */
> > >>>+	while (!(smp_load_acquire(&node->locked)))
> > >>>  		arch_mutex_cpu_relax();
> > >An alternate implementation is
> > >	while (!ACCESS_ONCE(node->locked))
> > >		arch_mutex_cpu_relax();
> > >	smp_load_acquire(&node->locked);
> > >
> > >Leaving the smp_load_acquire at the end to provide appropriate barrier.
> > >Will that be acceptable?
> > >
> > >Tim
> > 
> > I second Tim's opinion. It will be help to have a
> > smp_mb_load_acquire() function that provide a memory barrier with
> > load-acquire semantic. I don't think we need one for store-release
> > as that will not be in a loop.
> 
> Hmmm...  I guess the ACCESS_ONCE() in the smp_load_acquire() should
> prevent it from being optimized away.  But yes, you then end up with
> an extra load on the critical lock hand-off patch.  And something
> like an smp_mb_acquire() could then be useful, although I believe
> that on all current hardware smp_mb_acquire() emits the same code
> as would an smp_mb_release():
> 
> o	barrier() on TSO systems such as x86 and s390.
> 
> o	lwsync instruction on powerpc.  (Really old systems would
> 	want a different instruction for smp_mb_acquire(), but let's
> 	not optimize for really old systems.)
> 
> o	dmb instruction on ARM.
> 
> o	mf instruction on ia64.
> 
> So how about an smp_mb_acquire_release() to cover both use cases?

I guess we haven't addressed Will's preference to use wfe for ARM
instead of doing a spin loop.  I'll like some suggestions on how to
proceed here.  Should we do arch_mcs_lock and arch_mcs_unlock, which
defaults to the existing mcs_lock and mcs_unlock code, but allow
architecture specific implementation?

Tim

> This could be used to further optimize circular buffers, for example.
> 
> 							Thanx, Paul
> 
> > Peter, what do you think about adding that to your patch?
> > 
> > -Longman
> > 
> 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 21:45             ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 21:45 UTC (permalink / raw)
  To: paulmck
  Cc: Waiman Long, Peter Zijlstra, Will Deacon, Ingo Molnar,
	Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm,
	linux-arch, Linus Torvalds, Andrea Arcangeli, Alex Shi,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T,
	George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang

On Tue, 2013-11-19 at 11:32 -0800, Paul E. McKenney wrote:
> On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote:
> > On 11/11/2013 04:17 PM, Tim Chen wrote:
> > >>You could then augment that with [cmp]xchg_{acquire,release} as
> > >>appropriate.
> > >>
> > >>>+/*
> > >>>   * In order to acquire the lock, the caller should declare a local node and
> > >>>   * pass a reference of the node to this function in addition to the lock.
> > >>>   * If the lock has already been acquired, then this will proceed to spin
> > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > >>>  	node->locked = 0;
> > >>>  	node->next   = NULL;
> > >>>
> > >>>-	prev = xchg(lock, node);
> > >>>+	/* xchg() provides a memory barrier */
> > >>>+	prev = xchg_acquire(lock, node);
> > >>>  	if (likely(prev == NULL)) {
> > >>>  		/* Lock acquired */
> > >>>  		return;
> > >>>  	}
> > >>>  	ACCESS_ONCE(prev->next) = node;
> > >>>-	smp_wmb();
> > >>>-	/* Wait until the lock holder passes the lock down */
> > >>>-	while (!ACCESS_ONCE(node->locked))
> > >>>+	/*
> > >>>+	 * Wait until the lock holder passes the lock down.
> > >>>+	 * Using smp_load_acquire() provides a memory barrier that
> > >>>+	 * ensures subsequent operations happen after the lock is acquired.
> > >>>+	 */
> > >>>+	while (!(smp_load_acquire(&node->locked)))
> > >>>  		arch_mutex_cpu_relax();
> > >An alternate implementation is
> > >	while (!ACCESS_ONCE(node->locked))
> > >		arch_mutex_cpu_relax();
> > >	smp_load_acquire(&node->locked);
> > >
> > >Leaving the smp_load_acquire at the end to provide appropriate barrier.
> > >Will that be acceptable?
> > >
> > >Tim
> > 
> > I second Tim's opinion. It will be help to have a
> > smp_mb_load_acquire() function that provide a memory barrier with
> > load-acquire semantic. I don't think we need one for store-release
> > as that will not be in a loop.
> 
> Hmmm...  I guess the ACCESS_ONCE() in the smp_load_acquire() should
> prevent it from being optimized away.  But yes, you then end up with
> an extra load on the critical lock hand-off patch.  And something
> like an smp_mb_acquire() could then be useful, although I believe
> that on all current hardware smp_mb_acquire() emits the same code
> as would an smp_mb_release():
> 
> o	barrier() on TSO systems such as x86 and s390.
> 
> o	lwsync instruction on powerpc.  (Really old systems would
> 	want a different instruction for smp_mb_acquire(), but let's
> 	not optimize for really old systems.)
> 
> o	dmb instruction on ARM.
> 
> o	mf instruction on ia64.
> 
> So how about an smp_mb_acquire_release() to cover both use cases?

I guess we haven't addressed Will's preference to use wfe for ARM
instead of doing a spin loop.  I'll like some suggestions on how to
proceed here.  Should we do arch_mcs_lock and arch_mcs_unlock, which
defaults to the existing mcs_lock and mcs_unlock code, but allow
architecture specific implementation?

Tim

> This could be used to further optimize circular buffers, for example.
> 
> 							Thanx, Paul
> 
> > Peter, what do you think about adding that to your patch?
> > 
> > -Longman
> > 
> 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
  2013-11-19 19:13     ` Paul E. McKenney
  (?)
@ 2013-11-19 22:57       ` Tim Chen
  -1 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 22:57 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > 
> > +/*
> > + * Releases the lock. The caller should pass in the corresponding node that
> > + * was used to acquire the lock.
> > + */
> >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		/*
> >  		 * Release the lock by setting it to NULL
> >  		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > +		if (likely(cmpxchg(lock, node, NULL) == node))
> 
> Agreed here as well.  Takes a narrow race to hit this.
> 
> So, did your testing exercise this path?  If the answer is "yes", 


Paul,

I did some instrumentation and confirmed that the path in question has 
been exercised.  So this patch should be okay.

Tim

> and if the issues that I called out in patch #1 are resolved:
> 
> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> >  			return;
> >  		/* Wait until the next pointer is set */
> >  		while (!(next = ACCESS_ONCE(node->next)))
> >


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-19 22:57       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 22:57 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott

On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > 
> > +/*
> > + * Releases the lock. The caller should pass in the corresponding node that
> > + * was used to acquire the lock.
> > + */
> >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		/*
> >  		 * Release the lock by setting it to NULL
> >  		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > +		if (likely(cmpxchg(lock, node, NULL) == node))
> 
> Agreed here as well.  Takes a narrow race to hit this.
> 
> So, did your testing exercise this path?  If the answer is "yes", 


Paul,

I did some instrumentation and confirmed that the path in question has 
been exercised.  So this patch should be okay.

Tim

> and if the issues that I called out in patch #1 are resolved:
> 
> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> >  			return;
> >  		/* Wait until the next pointer is set */
> >  		while (!(next = ACCESS_ONCE(node->next)))
> >

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-19 22:57       ` Tim Chen
  0 siblings, 0 replies; 70+ messages in thread
From: Tim Chen @ 2013-11-19 22:57 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > 
> > +/*
> > + * Releases the lock. The caller should pass in the corresponding node that
> > + * was used to acquire the lock.
> > + */
> >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		/*
> >  		 * Release the lock by setting it to NULL
> >  		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > +		if (likely(cmpxchg(lock, node, NULL) == node))
> 
> Agreed here as well.  Takes a narrow race to hit this.
> 
> So, did your testing exercise this path?  If the answer is "yes", 


Paul,

I did some instrumentation and confirmed that the path in question has 
been exercised.  So this patch should be okay.

Tim

> and if the issues that I called out in patch #1 are resolved:
> 
> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> >  			return;
> >  		/* Wait until the next pointer is set */
> >  		while (!(next = ACCESS_ONCE(node->next)))
> >

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
  2013-11-19 22:57       ` Tim Chen
  (?)
@ 2013-11-19 23:05         ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 23:05 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, Nov 19, 2013 at 02:57:41PM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > > 
> > > +/*
> > > + * Releases the lock. The caller should pass in the corresponding node that
> > > + * was used to acquire the lock.
> > > + */
> > >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > >  {
> > >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> > >  		/*
> > >  		 * Release the lock by setting it to NULL
> > >  		 */
> > > -		if (cmpxchg(lock, node, NULL) == node)
> > > +		if (likely(cmpxchg(lock, node, NULL) == node))
> > 
> > Agreed here as well.  Takes a narrow race to hit this.
> > 
> > So, did your testing exercise this path?  If the answer is "yes", 
> 
> 
> Paul,
> 
> I did some instrumentation and confirmed that the path in question has 
> been exercised.  So this patch should be okay.

Very good!

							Thanx, Paul


^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-19 23:05         ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 23:05 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin

On Tue, Nov 19, 2013 at 02:57:41PM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > > 
> > > +/*
> > > + * Releases the lock. The caller should pass in the corresponding node that
> > > + * was used to acquire the lock.
> > > + */
> > >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > >  {
> > >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> > >  		/*
> > >  		 * Release the lock by setting it to NULL
> > >  		 */
> > > -		if (cmpxchg(lock, node, NULL) == node)
> > > +		if (likely(cmpxchg(lock, node, NULL) == node))
> > 
> > Agreed here as well.  Takes a narrow race to hit this.
> > 
> > So, did your testing exercise this path?  If the answer is "yes", 
> 
> 
> Paul,
> 
> I did some instrumentation and confirmed that the path in question has 
> been exercised.  So this patch should be okay.

Very good!

							Thanx, Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
@ 2013-11-19 23:05         ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 23:05 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, Nov 19, 2013 at 02:57:41PM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > > 
> > > +/*
> > > + * Releases the lock. The caller should pass in the corresponding node that
> > > + * was used to acquire the lock.
> > > + */
> > >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > >  {
> > >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> > >  		/*
> > >  		 * Release the lock by setting it to NULL
> > >  		 */
> > > -		if (cmpxchg(lock, node, NULL) == node)
> > > +		if (likely(cmpxchg(lock, node, NULL) == node))
> > 
> > Agreed here as well.  Takes a narrow race to hit this.
> > 
> > So, did your testing exercise this path?  If the answer is "yes", 
> 
> 
> Paul,
> 
> I did some instrumentation and confirmed that the path in question has 
> been exercised.  So this patch should be okay.

Very good!

							Thanx, Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-19 21:45             ` Tim Chen
@ 2013-11-19 23:30               ` Paul E. McKenney
  -1 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 23:30 UTC (permalink / raw)
  To: Tim Chen
  Cc: Waiman Long, Peter Zijlstra, Will Deacon, Ingo Molnar,
	Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm,
	linux-arch, Linus Torvalds, Andrea Arcangeli, Alex Shi,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra

On Tue, Nov 19, 2013 at 01:45:03PM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:32 -0800, Paul E. McKenney wrote:
> > On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote:
> > > On 11/11/2013 04:17 PM, Tim Chen wrote:
> > > >>You could then augment that with [cmp]xchg_{acquire,release} as
> > > >>appropriate.
> > > >>
> > > >>>+/*
> > > >>>   * In order to acquire the lock, the caller should declare a local node and
> > > >>>   * pass a reference of the node to this function in addition to the lock.
> > > >>>   * If the lock has already been acquired, then this will proceed to spin
> > > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > >>>  	node->locked = 0;
> > > >>>  	node->next   = NULL;
> > > >>>
> > > >>>-	prev = xchg(lock, node);
> > > >>>+	/* xchg() provides a memory barrier */
> > > >>>+	prev = xchg_acquire(lock, node);
> > > >>>  	if (likely(prev == NULL)) {
> > > >>>  		/* Lock acquired */
> > > >>>  		return;
> > > >>>  	}
> > > >>>  	ACCESS_ONCE(prev->next) = node;
> > > >>>-	smp_wmb();
> > > >>>-	/* Wait until the lock holder passes the lock down */
> > > >>>-	while (!ACCESS_ONCE(node->locked))
> > > >>>+	/*
> > > >>>+	 * Wait until the lock holder passes the lock down.
> > > >>>+	 * Using smp_load_acquire() provides a memory barrier that
> > > >>>+	 * ensures subsequent operations happen after the lock is acquired.
> > > >>>+	 */
> > > >>>+	while (!(smp_load_acquire(&node->locked)))
> > > >>>  		arch_mutex_cpu_relax();
> > > >An alternate implementation is
> > > >	while (!ACCESS_ONCE(node->locked))
> > > >		arch_mutex_cpu_relax();
> > > >	smp_load_acquire(&node->locked);
> > > >
> > > >Leaving the smp_load_acquire at the end to provide appropriate barrier.
> > > >Will that be acceptable?
> > > >
> > > >Tim
> > > 
> > > I second Tim's opinion. It will be help to have a
> > > smp_mb_load_acquire() function that provide a memory barrier with
> > > load-acquire semantic. I don't think we need one for store-release
> > > as that will not be in a loop.
> > 
> > Hmmm...  I guess the ACCESS_ONCE() in the smp_load_acquire() should
> > prevent it from being optimized away.  But yes, you then end up with
> > an extra load on the critical lock hand-off patch.  And something
> > like an smp_mb_acquire() could then be useful, although I believe
> > that on all current hardware smp_mb_acquire() emits the same code
> > as would an smp_mb_release():
> > 
> > o	barrier() on TSO systems such as x86 and s390.
> > 
> > o	lwsync instruction on powerpc.  (Really old systems would
> > 	want a different instruction for smp_mb_acquire(), but let's
> > 	not optimize for really old systems.)
> > 
> > o	dmb instruction on ARM.
> > 
> > o	mf instruction on ia64.
> > 
> > So how about an smp_mb_acquire_release() to cover both use cases?
> 
> I guess we haven't addressed Will's preference to use wfe for ARM
> instead of doing a spin loop.  I'll like some suggestions on how to
> proceed here.  Should we do arch_mcs_lock and arch_mcs_unlock, which
> defaults to the existing mcs_lock and mcs_unlock code, but allow
> architecture specific implementation?

It would be nice to confine the architecture-specific pieces if at all
possible.  For example, can the architecture-specific piece be confined
to the actual high-contention lock handoff?

							Thanx, Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
@ 2013-11-19 23:30               ` Paul E. McKenney
  0 siblings, 0 replies; 70+ messages in thread
From: Paul E. McKenney @ 2013-11-19 23:30 UTC (permalink / raw)
  To: Tim Chen
  Cc: Waiman Long, Peter Zijlstra, Will Deacon, Ingo Molnar,
	Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm,
	linux-arch, Linus Torvalds, Andrea Arcangeli, Alex Shi,
	Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox,
	Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T,
	George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang

On Tue, Nov 19, 2013 at 01:45:03PM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:32 -0800, Paul E. McKenney wrote:
> > On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote:
> > > On 11/11/2013 04:17 PM, Tim Chen wrote:
> > > >>You could then augment that with [cmp]xchg_{acquire,release} as
> > > >>appropriate.
> > > >>
> > > >>>+/*
> > > >>>   * In order to acquire the lock, the caller should declare a local node and
> > > >>>   * pass a reference of the node to this function in addition to the lock.
> > > >>>   * If the lock has already been acquired, then this will proceed to spin
> > > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > >>>  	node->locked = 0;
> > > >>>  	node->next   = NULL;
> > > >>>
> > > >>>-	prev = xchg(lock, node);
> > > >>>+	/* xchg() provides a memory barrier */
> > > >>>+	prev = xchg_acquire(lock, node);
> > > >>>  	if (likely(prev == NULL)) {
> > > >>>  		/* Lock acquired */
> > > >>>  		return;
> > > >>>  	}
> > > >>>  	ACCESS_ONCE(prev->next) = node;
> > > >>>-	smp_wmb();
> > > >>>-	/* Wait until the lock holder passes the lock down */
> > > >>>-	while (!ACCESS_ONCE(node->locked))
> > > >>>+	/*
> > > >>>+	 * Wait until the lock holder passes the lock down.
> > > >>>+	 * Using smp_load_acquire() provides a memory barrier that
> > > >>>+	 * ensures subsequent operations happen after the lock is acquired.
> > > >>>+	 */
> > > >>>+	while (!(smp_load_acquire(&node->locked)))
> > > >>>  		arch_mutex_cpu_relax();
> > > >An alternate implementation is
> > > >	while (!ACCESS_ONCE(node->locked))
> > > >		arch_mutex_cpu_relax();
> > > >	smp_load_acquire(&node->locked);
> > > >
> > > >Leaving the smp_load_acquire at the end to provide appropriate barrier.
> > > >Will that be acceptable?
> > > >
> > > >Tim
> > > 
> > > I second Tim's opinion. It will be help to have a
> > > smp_mb_load_acquire() function that provide a memory barrier with
> > > load-acquire semantic. I don't think we need one for store-release
> > > as that will not be in a loop.
> > 
> > Hmmm...  I guess the ACCESS_ONCE() in the smp_load_acquire() should
> > prevent it from being optimized away.  But yes, you then end up with
> > an extra load on the critical lock hand-off patch.  And something
> > like an smp_mb_acquire() could then be useful, although I believe
> > that on all current hardware smp_mb_acquire() emits the same code
> > as would an smp_mb_release():
> > 
> > o	barrier() on TSO systems such as x86 and s390.
> > 
> > o	lwsync instruction on powerpc.  (Really old systems would
> > 	want a different instruction for smp_mb_acquire(), but let's
> > 	not optimize for really old systems.)
> > 
> > o	dmb instruction on ARM.
> > 
> > o	mf instruction on ia64.
> > 
> > So how about an smp_mb_acquire_release() to cover both use cases?
> 
> I guess we haven't addressed Will's preference to use wfe for ARM
> instead of doing a spin loop.  I'll like some suggestions on how to
> proceed here.  Should we do arch_mcs_lock and arch_mcs_unlock, which
> defaults to the existing mcs_lock and mcs_unlock code, but allow
> architecture specific implementation?

It would be nice to confine the architecture-specific pieces if at all
possible.  For example, can the architecture-specific piece be confined
to the actual high-contention lock handoff?

							Thanx, Paul

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 70+ messages in thread

end of thread, other threads:[~2013-11-19 23:30 UTC | newest]

Thread overview: 70+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
2013-11-08 19:51 ` [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file Tim Chen
2013-11-08 19:51   ` Tim Chen
2013-11-08 19:51   ` Tim Chen
2013-11-19 19:10   ` Paul E. McKenney
2013-11-19 19:10     ` Paul E. McKenney
2013-11-19 19:10     ` Paul E. McKenney
2013-11-19 19:42     ` Tim Chen
2013-11-19 19:42       ` Tim Chen
2013-11-19 19:42       ` Tim Chen
2013-11-19 19:54       ` Paul E. McKenney
2013-11-19 19:54         ` Paul E. McKenney
2013-11-19 19:54         ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 2/4] MCS Lock: optimizations and extra comments Tim Chen
2013-11-08 19:52   ` Tim Chen
2013-11-08 19:52   ` Tim Chen
2013-11-19 19:13   ` Paul E. McKenney
2013-11-19 19:13     ` Paul E. McKenney
2013-11-19 19:13     ` Paul E. McKenney
2013-11-19 19:42     ` Tim Chen
2013-11-19 19:42       ` Tim Chen
2013-11-19 19:42       ` Tim Chen
2013-11-19 22:57     ` Tim Chen
2013-11-19 22:57       ` Tim Chen
2013-11-19 22:57       ` Tim Chen
2013-11-19 23:05       ` Paul E. McKenney
2013-11-19 23:05         ` Paul E. McKenney
2013-11-19 23:05         ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file Tim Chen
2013-11-08 19:52   ` Tim Chen
2013-11-08 19:52   ` Tim Chen
2013-11-19 19:15   ` Paul E. McKenney
2013-11-19 19:15     ` Paul E. McKenney
2013-11-19 19:15     ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 4/4] MCS Lock: Barrier corrections Tim Chen
2013-11-08 19:52   ` Tim Chen
2013-11-08 19:52   ` Tim Chen
2013-11-11 18:10   ` Will Deacon
2013-11-11 18:10     ` Will Deacon
2013-11-11 18:20     ` Peter Zijlstra
2013-11-11 18:20       ` Peter Zijlstra
2013-11-19 19:23       ` Paul E. McKenney
2013-11-19 19:23         ` Paul E. McKenney
2013-11-11 21:17     ` Tim Chen
2013-11-11 21:17       ` Tim Chen
2013-11-12  1:57       ` Waiman Long
2013-11-12  1:57         ` Waiman Long
2013-11-19 19:32         ` Paul E. McKenney
2013-11-19 19:32           ` Paul E. McKenney
2013-11-19 21:45           ` Tim Chen
2013-11-19 21:45             ` Tim Chen
2013-11-19 23:30             ` Paul E. McKenney
2013-11-19 23:30               ` Paul E. McKenney
2013-11-12  2:09       ` Waiman Long
2013-11-12  2:09         ` Waiman Long
2013-11-12 14:54       ` Waiman Long
2013-11-12 14:54         ` Waiman Long
2013-11-12 16:08       ` Will Deacon
2013-11-12 16:08         ` Will Deacon
2013-11-12 17:16         ` George Spelvin
2013-11-12 17:16           ` George Spelvin
2013-11-13 17:37           ` Will Deacon
2013-11-13 17:37             ` Will Deacon
2013-11-19 19:26     ` Paul E. McKenney
2013-11-19 19:26       ` Paul E. McKenney
2013-11-19 19:21   ` Paul E. McKenney
2013-11-19 19:21     ` Paul E. McKenney
2013-11-19 19:21     ` Paul E. McKenney
2013-11-19 19:46     ` Tim Chen
2013-11-19 19:46       ` Tim Chen
2013-11-19 19:46       ` Tim Chen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.