linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Oleg Nesterov <oleg@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>, Al Viro <viro@zeniv.linux.org.uk>
Cc: Dave Jones <davej@redhat.com>,
	Linux Kernel <linux-kernel@vger.kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	rostedt <rostedt@goodmis.org>, dhowells <dhowells@redhat.com>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 1/4] task_work: make task_work_add() lockless
Date: Sun, 26 Aug 2012 21:12:09 +0200	[thread overview]
Message-ID: <20120826191209.GA4221@redhat.com> (raw)
In-Reply-To: <20120826191132.GA3743@redhat.com>

Change task_works to use llist-like code to avoid pi_lock
in task_work_add(), this makes it useable under rq->lock.

task_work_cancel() and task_work_run() still use pi_lock
to synchronize with each other.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/task_work.c |   95 ++++++++++++++++++++++++++-------------------------
 1 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/kernel/task_work.c b/kernel/task_work.c
index d320d44..f13ec0b 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -3,25 +3,18 @@
 #include <linux/tracehook.h>
 
 int
-task_work_add(struct task_struct *task, struct callback_head *twork, bool notify)
+task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
 {
-	struct callback_head *last, *first;
-	unsigned long flags;
-
+	struct callback_head *head;
 	/*
 	 * Not inserting the new work if the task has already passed
 	 * exit_task_work() is the responisbility of callers.
 	 */
-	raw_spin_lock_irqsave(&task->pi_lock, flags);
-	last = task->task_works;
-	first = last ? last->next : twork;
-	twork->next = first;
-	if (last)
-		last->next = twork;
-	task->task_works = twork;
-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+	do {
+		head = ACCESS_ONCE(task->task_works);
+		work->next = head;
+	} while (cmpxchg(&task->task_works, head, work) != head);
 
-	/* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
 	if (notify)
 		set_notify_resume(task);
 	return 0;
@@ -30,52 +23,60 @@ task_work_add(struct task_struct *task, struct callback_head *twork, bool notify
 struct callback_head *
 task_work_cancel(struct task_struct *task, task_work_func_t func)
 {
+	struct callback_head **pprev = &task->task_works;
+	struct callback_head *work = NULL;
 	unsigned long flags;
-	struct callback_head *last, *res = NULL;
-
+	/*
+	 * If cmpxchg() fails we continue without updating pprev.
+	 * Either we raced with task_work_add() which added the
+	 * new entry before this work, we will find it again. Or
+	 * we raced with task_work_run(), *pprev == NULL.
+	 */
 	raw_spin_lock_irqsave(&task->pi_lock, flags);
-	last = task->task_works;
-	if (last) {
-		struct callback_head *q = last, *p = q->next;
-		while (1) {
-			if (p->func == func) {
-				q->next = p->next;
-				if (p == last)
-					task->task_works = q == p ? NULL : q;
-				res = p;
-				break;
-			}
-			if (p == last)
-				break;
-			q = p;
-			p = q->next;
-		}
+	while ((work = ACCESS_ONCE(*pprev))) {
+		read_barrier_depends();
+		if (work->func != func)
+			pprev = &work->next;
+		else if (cmpxchg(pprev, work, work->next) == work)
+			break;
 	}
 	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-	return res;
+
+	return work;
 }
 
 void task_work_run(void)
 {
 	struct task_struct *task = current;
-	struct callback_head *p, *q;
+	struct callback_head *work, *head, *next;
 
-	while (1) {
-		raw_spin_lock_irq(&task->pi_lock);
-		p = task->task_works;
-		task->task_works = NULL;
-		raw_spin_unlock_irq(&task->pi_lock);
+	for (;;) {
+		work = xchg(&task->task_works, NULL);
+		if (!work)
+			break;
+		/*
+		 * Synchronize with task_work_cancel(). It can't remove
+		 * the first entry == work, cmpxchg(task_works) should
+		 * fail, but it can play with *work and other entries.
+		 */
+		raw_spin_unlock_wait(&task->pi_lock);
+		smp_mb();
 
-		if (unlikely(!p))
-			return;
+		/* Reverse the list to run the works in fifo order */
+		head = NULL;
+		do {
+			next = work->next;
+			work->next = head;
+			head = work;
+			work = next;
+		} while (work);
 
-		q = p->next; /* head */
-		p->next = NULL; /* cut it */
-		while (q) {
-			p = q->next;
-			q->func(q);
-			q = p;
+		work = head;
+		do {
+			next = work->next;
+			work->func(work);
+			work = next;
 			cond_resched();
-		}
+		} while (work);
 	}
 }
-- 
1.5.5.1


  reply	other threads:[~2012-08-26 19:10 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-24 20:36 lockdep trace from posix timers Dave Jones
2012-07-27 16:20 ` Dave Jones
2012-08-16 12:54   ` Ming Lei
2012-08-16 14:03     ` Dave Jones
2012-08-16 18:07 ` Peter Zijlstra
2012-08-17 15:14   ` Oleg Nesterov
2012-08-17 15:17     ` Oleg Nesterov
2012-08-17 16:40       ` task_work_add() should not succeed unconditionally (Was: lockdep trace from posix timers) Oleg Nesterov
2012-08-20  7:15     ` lockdep trace from posix timers Peter Zijlstra
2012-08-20 11:44       ` Peter Zijlstra
2012-08-20 11:46         ` Peter Zijlstra
2012-08-20 11:50         ` Peter Zijlstra
2012-08-20 12:19           ` Steven Rostedt
2012-08-20 12:20             ` Peter Zijlstra
2012-08-20 14:59         ` Oleg Nesterov
2012-08-20 15:10           ` Peter Zijlstra
2012-08-20 15:27             ` Peter Zijlstra
2012-08-20 15:32               ` Oleg Nesterov
2012-08-20 15:46                 ` Peter Zijlstra
2012-08-20 15:58                   ` Oleg Nesterov
2012-08-20 16:03                     ` Peter Zijlstra
2012-08-20 15:05         ` Oleg Nesterov
2012-08-20 15:12           ` Peter Zijlstra
2012-08-20 15:41             ` Oleg Nesterov
2012-08-20 15:56               ` Peter Zijlstra
2012-08-20 16:10                 ` Oleg Nesterov
2012-08-20 16:19                   ` Peter Zijlstra
2012-08-20 16:23                     ` Oleg Nesterov
2012-08-21 18:27                       ` Oleg Nesterov
2012-08-21 18:34                         ` Oleg Nesterov
2012-08-24 18:56                           ` Oleg Nesterov
2012-08-26 19:11                             ` [PATCH 0/4] (Was: lockdep trace from posix timers) Oleg Nesterov
2012-08-26 19:12                               ` Oleg Nesterov [this message]
2012-09-14  6:08                                 ` [tip:core/urgent] task_work: Make task_work_add() lockless tip-bot for Oleg Nesterov
2012-09-24 19:27                                 ` [PATCH 1/4] task_work: make " Geert Uytterhoeven
2012-09-24 20:37                                   ` Oleg Nesterov
2012-08-26 19:12                               ` [PATCH 2/4] task_work: task_work_add() should not succeed after exit_task_work() Oleg Nesterov
2012-09-14  6:09                                 ` [tip:core/urgent] " tip-bot for Oleg Nesterov
2012-08-26 19:12                               ` [PATCH 3/4] task_work: revert d35abdb2 "hold task_lock around checks in keyctl" Oleg Nesterov
2012-09-14  6:10                                 ` [tip:core/urgent] task_work: Revert " hold " tip-bot for Oleg Nesterov
2012-08-26 19:12                               ` [PATCH 4/4] task_work: simplify the usage in ptrace_notify() and get_signal_to_deliver() Oleg Nesterov
2012-09-14  6:11                                 ` [tip:core/urgent] task_work: Simplify " tip-bot for Oleg Nesterov
2012-09-06 18:01                               ` [PATCH 0/4] (Was: lockdep trace from posix timers) Oleg Nesterov
2012-09-06 18:35                                 ` Peter Zijlstra
2012-09-07 13:13                                   ` Oleg Nesterov
2012-08-28 16:29                             ` lockdep trace from posix timers Peter Zijlstra
2012-08-28 17:01                               ` Oleg Nesterov
2012-08-28 17:12                                 ` Oleg Nesterov
2012-08-28 17:28                                 ` Peter Zijlstra
2012-08-29 15:25                                   ` Oleg Nesterov
2012-08-20 14:55       ` Oleg Nesterov
2012-08-20 15:43       ` Oleg Nesterov
2012-08-20 15:48         ` Peter Zijlstra
2012-08-20 15:58           ` Oleg Nesterov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120826191209.GA4221@redhat.com \
    --to=oleg@redhat.com \
    --cc=davej@redhat.com \
    --cc=dhowells@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).