All of lore.kernel.org
 help / color / mirror / Atom feed
From: Davidlohr Bueso <dave@stgolabs.net>
To: akpm@linux-foundation.org
Cc: manfred@colorfullife.com, dave@stgolabs.net,
	linux-kernel@vger.kernel.org, Davidlohr Bueso <dbueso@suse.de>
Subject: [PATCH 3/5] ipc/sem: optimize perform_atomic_semop()
Date: Mon, 12 Sep 2016 04:53:34 -0700	[thread overview]
Message-ID: <1473681216-20025-4-git-send-email-dave@stgolabs.net> (raw)
In-Reply-To: <1473681216-20025-1-git-send-email-dave@stgolabs.net>

This is the main workhorse that deals with semop user calls
such that the waitforzero or semval update operations, on the
set, can complete on not as the sma currently stands. Currently,
the set is iterated twice (setting semval, then backwards for
the sempid value). Slowpaths, and particularly SEM_UNDO calls,
must undo any altered sem when it is detected that the caller
must block or has errored-out.

With larger sets, there can occur situations where this involves
a lot of cycles and can obviously be a suboptimal use of cached
resources in shared memory. Ie, discarding CPU caches that are
also calling semop and have the sembuf cached (and can complete),
while the current lock holder doing the semop will block, error,
or does a waitforzero operation.

This patch proposes still iterating the set twice, but the first
scan is read-only, and we perform the actual updates afterward,
once we know that the call will succeed. In order to not suffer
from the overhead of dealing with sops that act on the same sem_num,
such (rare )cases use perform_atomic_semop_slow(), which is exactly
what we have now. Duplicates are detected before grabbing sem_lock,
and uses simple a 64-bit variable to enable the sem_num-th bit.
Of course, this means that semops calls with a sem_num larger than
64 (SEMOPM_FAST, for now, as this is really about the nsops), will
take the _slow() alternative; but many real-world workloads only
work on a handful of semaphores in a given set, thus good enough
for the common case.

In addition add some comments to when we expect to the caller
to block.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 ipc/sem.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 7 deletions(-)

diff --git a/ipc/sem.c b/ipc/sem.c
index 86467b5b78ad..d9c743ac17ff 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -115,7 +115,8 @@ struct sem_queue {
 	struct sembuf		*sops;	 /* array of pending operations */
 	struct sembuf		*blocking; /* the operation that blocked */
 	int			nsops;	 /* number of operations */
-	int			alter;	 /* does *sops alter the array? */
+	bool			alter;   /* does *sops alter the array? */
+	bool                    dupsop;	 /* sops on more than one sem_num */
 };
 
 /* Each task has a list of undo requests. They are executed automatically
@@ -595,7 +596,8 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
  * Returns 1 if the operation is impossible, the caller must sleep.
  * Negative values are error codes.
  */
-static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
+static int perform_atomic_semop_slow(struct sem_array *sma,
+				     struct sem_queue *q)
 {
 	int result, sem_op, nsops, pid;
 	struct sembuf *sop;
@@ -666,6 +668,72 @@ undo:
 	return result;
 }
 
+static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
+{
+	int result, sem_op, nsops;
+	struct sembuf *sop;
+	struct sem *curr;
+	struct sembuf *sops;
+	struct sem_undo *un;
+
+	sops = q->sops;
+	nsops = q->nsops;
+	un = q->undo;
+
+	if (q->dupsop)
+		return perform_atomic_semop_slow(sma, q);
+
+	/*
+	 * We scan the semaphore set twice, first to ensure that the entire
+	 * operation can succeed, therefore avoiding any pointless writes
+	 * to shared memory and having to undo such changes in order to block
+	 * until the operations can go through.
+	 */
+	for (sop = sops; sop < sops + nsops; sop++) {
+		curr = sma->sem_base + sop->sem_num;
+		sem_op = sop->sem_op;
+		result = curr->semval;
+
+		if (!sem_op && result)
+			goto would_block; /* wait-for-zero */
+
+		result += sem_op;
+		if (result < 0)
+			goto would_block;
+
+		if (result > SEMVMX)
+			return -ERANGE;
+
+		if (sop->sem_flg & SEM_UNDO) {
+			int undo = un->semadj[sop->sem_num] - sem_op;
+
+			/* Exceeding the undo range is an error. */
+			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
+				return -ERANGE;
+		}
+	}
+
+	for (sop = sops; sop < sops + nsops; sop++) {
+		curr = sma->sem_base + sop->sem_num;
+		sem_op = sop->sem_op;
+		result = curr->semval;
+
+		if (sop->sem_flg & SEM_UNDO) {
+			int undo = un->semadj[sop->sem_num] - sem_op;
+			un->semadj[sop->sem_num] = undo;
+		}
+
+		curr->semval += sem_op;
+		curr->sempid = q->pid;
+	}
+
+	return 0;
+
+would_block:
+	q->blocking = sop;
+	return sop->sem_flg & IPC_NOWAIT? -EAGAIN : 1;
+}
+
 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
 					     struct wake_q_head *wake_q)
 {
@@ -1713,9 +1781,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	struct sembuf fast_sops[SEMOPM_FAST];
 	struct sembuf *sops = fast_sops, *sop;
 	struct sem_undo *un;
-	int undos = 0, alter = 0, max, locknum;
+	int max, locknum;
+	bool undos = false, alter = false, dupsop = false;
 	struct sem_queue queue;
-	unsigned long jiffies_left = 0;
+	unsigned long dup = 0, jiffies_left = 0;
 	struct ipc_namespace *ns;
 
 	ns = current->nsproxy->ipc_ns;
@@ -1751,12 +1820,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 		if (sop->sem_num >= max)
 			max = sop->sem_num;
 		if (sop->sem_flg & SEM_UNDO)
-			undos = 1;
+			undos = true;
 		if (sop->sem_op != 0)
-			alter = 1;
+			alter = true;
+		if (sop->sem_num < SEMOPM_FAST && !dupsop) {
+			if (dup & (1 << sop->sem_num))
+				dupsop = 1;
+			else
+				dup |= 1 << sop->sem_num;
+		}
 	}
 
-
 	if (undos) {
 		/* On success, find_alloc_undo takes the rcu_read_lock */
 		un = find_alloc_undo(ns, semid);
@@ -1821,6 +1895,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	queue.undo = un;
 	queue.pid = task_tgid_vnr(current);
 	queue.alter = alter;
+	queue.dupsop = dupsop;
 
 	error = perform_atomic_semop(sma, &queue);
 	if (error <= 0) { /* non-blocking path */
-- 
2.6.6

  parent reply	other threads:[~2016-09-12 11:54 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-12 11:53 [PATCH -next 0/5] ipc/sem: semop(2) improvements Davidlohr Bueso
2016-09-12 11:53 ` [PATCH 1/5] ipc/sem: do not call wake_sem_queue_do() prematurely Davidlohr Bueso
2016-09-13  4:17   ` Manfred Spraul
2016-09-13  8:14     ` Davidlohr Bueso
2016-09-12 11:53 ` [PATCH 2/5] ipc/sem: rework task wakeups Davidlohr Bueso
2016-09-13 18:04   ` Manfred Spraul
2016-09-14 15:45     ` Davidlohr Bueso
2016-09-18 14:37   ` Manfred Spraul
2016-09-18 18:26     ` Davidlohr Bueso
2016-09-12 11:53 ` Davidlohr Bueso [this message]
2016-09-12 17:56   ` [PATCH 3/5] ipc/sem: optimize perform_atomic_semop() Manfred Spraul
2016-09-13  8:33     ` Davidlohr Bueso
2016-09-19  4:41       ` Manfred Spraul
2016-09-12 11:53 ` [PATCH 4/5] ipc/sem: explicitly inline check_restart Davidlohr Bueso
2016-09-12 11:53 ` [PATCH 5/5] ipc/sem: use proper list api for pending_list wakeups Davidlohr Bueso
2016-09-18 17:51   ` Manfred Spraul
2016-09-18 19:11 [PATCH -next v2 0/5] ipc/sem: semop(2) improvements Davidlohr Bueso
2016-09-18 19:11 ` [PATCH 3/5] ipc/sem: optimize perform_atomic_semop() Davidlohr Bueso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1473681216-20025-4-git-send-email-dave@stgolabs.net \
    --to=dave@stgolabs.net \
    --cc=akpm@linux-foundation.org \
    --cc=dbueso@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=manfred@colorfullife.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.