linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Manfred Spraul <manfred@colorfullife.com>
To: Stephen Rothwell <sfr@canb.auug.org.au>,
	Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@elte.hu>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	linux-next@vger.kernel.org, 1vier1@web.de,
	Davidlohr Bueso <dave@stgolabs.net>,
	felixh@informatik.uni-bremen.de,
	Manfred Spraul <manfred@colorfullife.com>
Subject: [PATCH 2/2] ipc/sem: sem_lock with hysteresis
Date: Sat, 18 Jun 2016 22:02:22 +0200	[thread overview]
Message-ID: <1466280142-19741-2-git-send-email-manfred@colorfullife.com> (raw)
In-Reply-To: <1466280142-19741-1-git-send-email-manfred@colorfullife.com>

sysv sem has two lock modes: One with per-semaphore locks, one lock mode
with a single big lock for the whole array.
When switching from the per-semaphore locks to the big lock, all
per-semaphore locks must be scanned for ongoing operations.

The patch adds a hysteresis for switching from the big lock to the per
semaphore locks. This reduces how often the per-semaphore locks must
be scanned.

Passed stress testing with sem-scalebench.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>

---
 include/linux/sem.h |  2 +-
 ipc/sem.c           | 91 ++++++++++++++++++++++++++++-------------------------
 2 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index d0efd6e..6fb3227 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -21,7 +21,7 @@ struct sem_array {
 	struct list_head	list_id;	/* undo requests on this array */
 	int			sem_nsems;	/* no. of semaphores in array */
 	int			complex_count;	/* pending complex operations */
-	bool			complex_mode;	/* no parallel simple ops */
+	int			complex_mode;	/* >0: no parallel simple ops */
 };
 
 #ifdef CONFIG_SYSVIPC
diff --git a/ipc/sem.c b/ipc/sem.c
index 11d9e60..1f43fb8 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -161,6 +161,13 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #define SEMOPM_FAST	64  /* ~ 372 bytes on stack */
 
 /*
+ * Switching from the mode suitable for simple ops
+ * to the mode for complex ops is costly. Therefore:
+ * use some hysteresis
+ */
+#define COMPLEX_MODE_ENTER 10
+
+/*
  * Locking:
  * a) global sem_lock() for read/write
  *	sem_undo.id_next,
@@ -279,17 +286,25 @@ static void sem_rcu_free(struct rcu_head *head)
 /*
  * Enter the mode suitable for non-simple operations:
  * Caller must own sem_perm.lock.
+ * Note:
+ * There is no leave complex mode function. Leaving
+ * happens in sem_lock, with some hysteresis.
  */
 static void complexmode_enter(struct sem_array *sma)
 {
 	int i;
 	struct sem *sem;
 
-	if (sma->complex_mode)  {
-		/* We are already in complex_mode. Nothing to do */
+	if (sma->complex_mode > 0)  {
+		/*
+		 * We are already in complex_mode.
+		 * Nothing to do, just increase
+		 * counter until we return to simple mode
+		 */
+		WRITE_ONCE(sma->complex_mode, COMPLEX_MODE_ENTER);
 		return;
 	}
-	WRITE_ONCE(sma->complex_mode, true);
+	WRITE_ONCE(sma->complex_mode, COMPLEX_MODE_ENTER);
 
 	/* We need a full barrier:
 	 * The write to complex_mode must be visible
@@ -305,29 +320,6 @@ static void complexmode_enter(struct sem_array *sma)
 }
 
 /*
- * Try to leave the mode that disallows simple operations:
- * Caller must own sem_perm.lock.
- */
-static void complexmode_tryleave(struct sem_array *sma)
-{
-	if (sma->complex_count)  {
-		/* Complex ops are sleeping.
-		 * We must stay in complex mode
-		 */
-		return;
-	}
-	/*
-	 * Immediately after setting complex_mode to false,
-	 * a simple op can start. Thus: all memory writes
-	 * performed by the current operation must be visible
-	 * before we set complex_mode to false.
-	 */
-	smp_wmb();
-
-	WRITE_ONCE(sma->complex_mode, false);
-}
-
-/*
  * If the request contains only one semaphore operation, and there are
  * no complex transactions pending, lock only the semaphore involved.
  * Otherwise, lock the entire semaphore array, since we either have
@@ -383,27 +375,42 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 	ipc_lock_object(&sma->sem_perm);
 
 	if (sma->complex_count == 0) {
-		/* False alarm:
-		 * There is no complex operation, thus we can switch
-		 * back to the fast path.
-		 */
-		spin_lock(&sem->lock);
-		ipc_unlock_object(&sma->sem_perm);
-		return sops->sem_num;
-	} else {
-		/* Not a false alarm, thus complete the sequence for a
-		 * full lock.
+		/*
+		 * Check if fast path is possible:
+		 * There is no complex operation, check hysteresis
+		 * If 0, switch back to the fast path.
 		 */
-		complexmode_enter(sma);
-		return -1;
+		if (sma->complex_mode > 0) {
+			/* Note:
+			 * Immediately after setting complex_mode to 0,
+			 * a simple op could start.
+			 * The data it would access was written by the
+			 * previous owner of sem->sem_perm.lock, i.e
+			 * a release and an acquire memory barrier ago.
+			 * No need for another barrier.
+			 */
+			WRITE_ONCE(sma->complex_mode, sma->complex_mode-1);
+		}
+		if (sma->complex_mode == 0) {
+			spin_lock(&sem->lock);
+			ipc_unlock_object(&sma->sem_perm);
+			return sops->sem_num;
+		}
 	}
+	/*
+	 * Not a false alarm, full lock is required.
+	 * Since we are already in complex_mode (either because of waiting
+	 * complex ops or due to hysteresis), there is not need for a
+	 * complexmode_enter().
+	 */
+	WARN_ON(sma->complex_mode == 0);
+	return -1;
 }
 
 static inline void sem_unlock(struct sem_array *sma, int locknum)
 {
 	if (locknum == -1) {
 		unmerge_queues(sma);
-		complexmode_tryleave(sma);
 		ipc_unlock_object(&sma->sem_perm);
 	} else {
 		struct sem *sem = sma->sem_base + locknum;
@@ -555,7 +562,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	}
 
 	sma->complex_count = 0;
-	sma->complex_mode = true; /* dropped by sem_unlock below */
+	WRITE_ONCE(sma->complex_mode, COMPLEX_MODE_ENTER);
 	INIT_LIST_HEAD(&sma->pending_alter);
 	INIT_LIST_HEAD(&sma->pending_const);
 	INIT_LIST_HEAD(&sma->list_id);
@@ -2212,7 +2219,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
 	 * The proc interface isn't aware of sem_lock(), it calls
 	 * ipc_lock_object() directly (in sysvipc_find_ipc).
 	 * In order to stay compatible with sem_lock(), we must
-	 * enter / leave complex_mode.
+	 * enter complex_mode.
 	 */
 	complexmode_enter(sma);
 
@@ -2231,8 +2238,6 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
 		   sem_otime,
 		   sma->sem_ctime);
 
-	complexmode_tryleave(sma);
-
 	return 0;
 }
 #endif
-- 
2.5.5

  reply	other threads:[~2016-06-18 20:03 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-15  5:23 linux-next: manual merge of the akpm-current tree with the tip tree Stephen Rothwell
2016-06-18 19:39 ` Manfred Spraul
2016-06-18 20:02 ` [PATCH 1/2] ipc/sem.c: Fix complex_count vs. simple op race Manfred Spraul
2016-06-18 20:02   ` Manfred Spraul [this message]
2016-06-21 20:29     ` [PATCH 2/2] ipc/sem: sem_lock with hysteresis Davidlohr Bueso
2016-06-25 17:37       ` Manfred Spraul
2016-06-28 17:54         ` Davidlohr Bueso
2016-06-20 23:04   ` [PATCH 1/2] ipc/sem.c: Fix complex_count vs. simple op race Andrew Morton
2016-06-23 18:55     ` Manfred Spraul
2016-06-21  0:30   ` Davidlohr Bueso
2016-06-23 19:22     ` Manfred Spraul
2016-06-28  5:27       ` Davidlohr Bueso
2016-06-30 19:28         ` Manfred Spraul
2016-07-01 16:52           ` Davidlohr Bueso
2016-06-25 17:37 [PATCH 0/2] ipc/sem.c: sem_lock fixes Manfred Spraul
2016-06-25 17:37 ` [PATCH 1/2] ipc/sem.c: Fix complex_count vs. simple op race Manfred Spraul
2016-06-25 17:37   ` [PATCH 2/2] ipc/sem: sem_lock with hysteresis Manfred Spraul

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1466280142-19741-2-git-send-email-manfred@colorfullife.com \
    --to=manfred@colorfullife.com \
    --cc=1vier1@web.de \
    --cc=akpm@linux-foundation.org \
    --cc=dave@stgolabs.net \
    --cc=felixh@informatik.uni-bremen.de \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-next@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=sfr@canb.auug.org.au \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).