linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Linus Torvalds <torvalds@linux-foundation.org>
To: Tim Chen <tim.c.chen@linux.intel.com>
Cc: "Liang, Kan" <kan.liang@intel.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Mel Gorman <mgorman@suse.de>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@elte.hu>, Andi Kleen <ak@linux.intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>, Jan Kara <jack@suse.cz>,
	linux-mm <linux-mm@kvack.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 1/2] sched/wait: Break up long wake list walk
Date: Wed, 23 Aug 2017 16:30:51 -0700	[thread overview]
Message-ID: <CA+55aFzxisTJS+Z7q+Dp9oRgvMpXEQRedYFu7-k_YXEE-=htgA@mail.gmail.com> (raw)
In-Reply-To: <CA+55aFzbom=qFc2pYk07XhiMBn083EXugSUHmSVbTuu8eJtHVQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1192 bytes --]

On Wed, Aug 23, 2017 at 11:17 AM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Wed, Aug 23, 2017 at 8:58 AM, Tim Chen <tim.c.chen@linux.intel.com> wrote:
>>
>> Will you still consider the original patch as a fail safe mechanism?
>
> I don't think we have much choice, although I would *really* want to
> get this root-caused rather than just papering over the symptoms.

Oh well. Apparently we're not making progress on that, so I looked at
the patch again.

Can we fix it up a bit? In particular, the "bookmark_wake_function()"
thing added no value, and definitely shouldn't have been exported.
Just use NULL instead.

And the WAITQUEUE_WALK_BREAK_CNT thing should be internal to
__wake_up_common(), not in some common header file. Again, there's no
value in exporting it to anybody else.

And doing

                if (curr->flags & WQ_FLAG_BOOKMARK)

looks odd, when we just did

                unsigned flags = curr->flags;

one line earlier, so that can be just simplified.

So can you test that simplified version of the patch? I'm attaching my
suggested edited patch, but you may just want to do those changes
directly to your tree instead.

Hmm?

               Linus

[-- Attachment #2: patch.diff --]
[-- Type: text/plain, Size: 5018 bytes --]

 include/linux/wait.h |  1 +
 kernel/sched/wait.c  | 74 ++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index dc19880c02f5..78401ef02d29 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -18,6 +18,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int
 /* wait_queue_entry::flags */
 #define WQ_FLAG_EXCLUSIVE	0x01
 #define WQ_FLAG_WOKEN		0x02
+#define WQ_FLAG_BOOKMARK	0x04
 
 /*
  * A single wait-queue entry structure:
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 17f11c6b0a9f..789dc24a323d 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -53,6 +53,12 @@ void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry
 }
 EXPORT_SYMBOL(remove_wait_queue);
 
+/*
+ * Scan threshold to break wait queue walk.
+ * This allows a waker to take a break from holding the
+ * wait queue lock during the wait queue walk.
+ */
+#define WAITQUEUE_WALK_BREAK_CNT 64
 
 /*
  * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
@@ -63,17 +69,64 @@ EXPORT_SYMBOL(remove_wait_queue);
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
-			int nr_exclusive, int wake_flags, void *key)
+static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
+			int nr_exclusive, int wake_flags, void *key,
+			wait_queue_entry_t *bookmark)
 {
 	wait_queue_entry_t *curr, *next;
+	int cnt = 0;
+
+	if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
+		curr = list_next_entry(bookmark, entry);
 
-	list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
+		list_del(&bookmark->entry);
+		bookmark->flags = 0;
+	} else
+		curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
+
+	if (&curr->entry == &wq_head->head)
+		return nr_exclusive;
+
+	list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
 		unsigned flags = curr->flags;
 
+		if (flags & WQ_FLAG_BOOKMARK)
+			continue;
+
 		if (curr->func(curr, mode, wake_flags, key) &&
 				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
 			break;
+
+		if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) &&
+				(&next->entry != &wq_head->head)) {
+			bookmark->flags = WQ_FLAG_BOOKMARK;
+			list_add_tail(&bookmark->entry, &next->entry);
+			break;
+		}
+	}
+	return nr_exclusive;
+}
+
+static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
+			int nr_exclusive, int wake_flags, void *key)
+{
+	unsigned long flags;
+	wait_queue_entry_t bookmark;
+
+	bookmark.flags = 0;
+	bookmark.private = NULL;
+	bookmark.func = NULL;
+	INIT_LIST_HEAD(&bookmark.entry);
+
+	spin_lock_irqsave(&wq_head->lock, flags);
+	nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+
+	while (bookmark.flags & WQ_FLAG_BOOKMARK) {
+		spin_lock_irqsave(&wq_head->lock, flags);
+		nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
+						wake_flags, key, &bookmark);
+		spin_unlock_irqrestore(&wq_head->lock, flags);
 	}
 }
 
@@ -90,11 +143,7 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&wq_head->lock, flags);
-	__wake_up_common(wq_head, mode, nr_exclusive, 0, key);
-	spin_unlock_irqrestore(&wq_head->lock, flags);
+	__wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
 }
 EXPORT_SYMBOL(__wake_up);
 
@@ -103,13 +152,13 @@ EXPORT_SYMBOL(__wake_up);
  */
 void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
 {
-	__wake_up_common(wq_head, mode, nr, 0, NULL);
+	__wake_up_common(wq_head, mode, nr, 0, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked);
 
 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
 {
-	__wake_up_common(wq_head, mode, 1, 0, key);
+	__wake_up_common(wq_head, mode, 1, 0, key, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 
@@ -133,7 +182,6 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
-	unsigned long flags;
 	int wake_flags = 1; /* XXX WF_SYNC */
 
 	if (unlikely(!wq_head))
@@ -142,9 +190,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 	if (unlikely(nr_exclusive != 1))
 		wake_flags = 0;
 
-	spin_lock_irqsave(&wq_head->lock, flags);
-	__wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key);
-	spin_unlock_irqrestore(&wq_head->lock, flags);
+	__wake_up_common_lock(wq_head, mode, nr_exclusive, wake_flags, key);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
 

  parent reply	other threads:[~2017-08-23 23:30 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-15  0:52 [PATCH 1/2] sched/wait: Break up long wake list walk Tim Chen
2017-08-15  0:52 ` [PATCH 2/2] sched/wait: Introduce lock breaker in wake_up_page_bit Tim Chen
2017-08-15  1:48 ` [PATCH 1/2] sched/wait: Break up long wake list walk Linus Torvalds
2017-08-15  2:27   ` Andi Kleen
2017-08-15  2:52     ` Linus Torvalds
2017-08-15  3:15       ` Andi Kleen
2017-08-15  3:28         ` Linus Torvalds
2017-08-15 19:05           ` Tim Chen
2017-08-15 19:41             ` Linus Torvalds
2017-08-15 19:47               ` Linus Torvalds
2017-08-15 22:47           ` Davidlohr Bueso
2017-08-15 22:56             ` Linus Torvalds
2017-08-15 22:57               ` Linus Torvalds
2017-08-15 23:50                 ` Linus Torvalds
2017-08-16 23:22                   ` Eric W. Biederman
2017-08-17 16:17   ` Liang, Kan
2017-08-17 16:25     ` Linus Torvalds
2017-08-17 20:18       ` Liang, Kan
2017-08-17 20:44         ` Linus Torvalds
2017-08-18 12:23           ` Mel Gorman
2017-08-18 14:20             ` Liang, Kan
2017-08-18 14:46               ` Mel Gorman
2017-08-18 16:36                 ` Tim Chen
2017-08-18 16:45                   ` Andi Kleen
2017-08-18 16:53                 ` Liang, Kan
2017-08-18 17:48                   ` Linus Torvalds
2017-08-18 18:54                     ` Mel Gorman
2017-08-18 19:14                       ` Linus Torvalds
2017-08-18 19:58                         ` Andi Kleen
2017-08-18 20:10                           ` Linus Torvalds
2017-08-21 18:32                         ` Mel Gorman
2017-08-21 18:56                           ` Liang, Kan
2017-08-22 17:23                             ` Liang, Kan
2017-08-22 18:19                               ` Linus Torvalds
2017-08-22 18:25                                 ` Linus Torvalds
2017-08-22 18:56                                 ` Peter Zijlstra
2017-08-22 19:15                                   ` Linus Torvalds
2017-08-22 19:08                                 ` Peter Zijlstra
2017-08-22 19:30                                   ` Linus Torvalds
2017-08-22 19:37                                     ` Andi Kleen
2017-08-22 21:08                                       ` Christopher Lameter
2017-08-22 21:24                                         ` Andi Kleen
2017-08-22 22:52                                           ` Linus Torvalds
2017-08-22 23:19                                             ` Linus Torvalds
2017-08-23 14:51                                             ` Liang, Kan
2017-08-22 19:55                                 ` Liang, Kan
2017-08-22 20:42                                   ` Linus Torvalds
2017-08-22 20:53                                     ` Peter Zijlstra
2017-08-22 20:58                                       ` Linus Torvalds
2017-08-23 14:49                                     ` Liang, Kan
2017-08-23 15:58                                       ` Tim Chen
2017-08-23 18:17                                         ` Linus Torvalds
2017-08-23 20:55                                           ` Liang, Kan
2017-08-23 23:30                                           ` Linus Torvalds [this message]
2017-08-24 17:49                                             ` Tim Chen
2017-08-24 18:16                                               ` Linus Torvalds
2017-08-24 20:44                                                 ` Mel Gorman
2017-08-25 16:44                                                   ` Tim Chen
2017-08-23 16:04                                 ` Mel Gorman
2017-08-18 20:05                     ` Andi Kleen
2017-08-18 20:29                       ` Linus Torvalds
2017-08-18 20:29                     ` Liang, Kan
2017-08-18 20:34                       ` Linus Torvalds
2017-08-18 16:55             ` Linus Torvalds
2017-08-18 13:06           ` Liang, Kan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+55aFzxisTJS+Z7q+Dp9oRgvMpXEQRedYFu7-k_YXEE-=htgA@mail.gmail.com' \
    --to=torvalds@linux-foundation.org \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=jack@suse.cz \
    --cc=kan.liang@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=tim.c.chen@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).