[RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework

linux-rt-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework
@ 2019-09-20 21:53 Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 1/7] revert-aio Steven Rostedt
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Steven Rostedt @ 2019-09-20 21:53 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Thomas Gleixner, Carsten Emde, Sebastian Andrzej Siewior,
	John Kacur, Paul Gortmaker, Julia Cartwright, Daniel Wagner,
	tom.zanussi

To be able to backport some of the fixes done in upstream RT, I need
to revert the simple work code and incorporate the work queue rework
that was done in rt-devel. I originally thought this crashed under
stress test (as I reported in the stable meeting), but found that
the system booted a different kernel that had known issues. 4.19-rt with
these patches have passed all my tests. But before posting an rc1, 
I was hoping to get some people to look it over and perhaps even test
it some to make sure there's not any issues here.

The reason I'm doing this is that this goes beyond the stable scope
for 4.19-rt, but I believe it is still necessary.

-- Steve



Daniel Wagner (1):
      thermal: Defer thermal wakups to threads

Sebastian Andrzej Siewior (3):
      fs/aio: simple simple work
      block: blk-mq: move blk_queue_usage_counter_release() into process context
      workqueue: rework

Steven Rostedt (VMware) (3):
      revert-aio
      revert-thermal
      revert-block

----
 block/blk-core.c                       |  10 +-
 drivers/block/loop.c                   |   2 +-
 drivers/spi/spi-rockchip.c             |   1 -
 drivers/thermal/x86_pkg_temp_thermal.c |  52 +-----
 fs/aio.c                               |  12 +-
 include/linux/blk-cgroup.h             |   2 +-
 include/linux/blkdev.h                 |   4 +-
 include/linux/interrupt.h              |   5 -
 include/linux/kthread-cgroup.h         |  17 --
 include/linux/kthread.h                |  15 +-
 include/linux/swait.h                  |  14 ++
 include/linux/workqueue.h              |   4 -
 init/main.c                            |   1 -
 kernel/irq/manage.c                    |  36 +---
 kernel/kthread.c                       |  14 --
 kernel/sched/core.c                    |   1 +
 kernel/time/hrtimer.c                  |  24 ---
 kernel/workqueue.c                     | 300 ++++++++++++++-------------------
 18 files changed, 168 insertions(+), 346 deletions(-)
 delete mode 100644 include/linux/kthread-cgroup.h

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC][PATCH RT 1/7] revert-aio
  2019-09-20 21:53 [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework Steven Rostedt
@ 2019-09-20 21:53 ` Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 2/7] fs/aio: simple simple work Steven Rostedt
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Steven Rostedt @ 2019-09-20 21:53 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Thomas Gleixner, Carsten Emde, Sebastian Andrzej Siewior,
	John Kacur, Paul Gortmaker, Julia Cartwright, Daniel Wagner,
	tom.zanussi

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

revert: fs/aio: simple simple work

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 fs/aio.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 16dcf8521c2c..911e23087dfb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -42,7 +42,6 @@
 #include <linux/ramfs.h>
 #include <linux/percpu-refcount.h>
 #include <linux/mount.h>
-#include <linux/swork.h>
 
 #include <asm/kmap_types.h>
 #include <linux/uaccess.h>
@@ -122,7 +121,6 @@ struct kioctx {
 	long			nr_pages;
 
 	struct rcu_work		free_rwork;	/* see free_ioctx() */
-	struct swork_event	free_swork;	/* see free_ioctx() */
 
 	/*
 	 * signals when all in-flight requests are done
@@ -267,7 +265,6 @@ static int __init aio_setup(void)
 		.mount		= aio_mount,
 		.kill_sb	= kill_anon_super,
 	};
-	BUG_ON(swork_get());
 	aio_mnt = kern_mount(&aio_fs);
 	if (IS_ERR(aio_mnt))
 		panic("Failed to create aio fs mount.");
@@ -609,9 +606,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  * now it's safe to cancel any that need to be.
  */
-static void free_ioctx_users_work(struct swork_event *sev)
+static void free_ioctx_users(struct percpu_ref *ref)
 {
-	struct kioctx *ctx = container_of(sev, struct kioctx, free_swork);
+	struct kioctx *ctx = container_of(ref, struct kioctx, users);
 	struct aio_kiocb *req;
 
 	spin_lock_irq(&ctx->ctx_lock);
@@ -629,14 +626,6 @@ static void free_ioctx_users_work(struct swork_event *sev)
 	percpu_ref_put(&ctx->reqs);
 }
 
-static void free_ioctx_users(struct percpu_ref *ref)
-{
-	struct kioctx *ctx = container_of(ref, struct kioctx, users);
-
-	INIT_SWORK(&ctx->free_swork, free_ioctx_users_work);
-	swork_queue(&ctx->free_swork);
-}
-
 static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
 {
 	unsigned i, new_nr;
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC][PATCH RT 2/7] fs/aio: simple simple work
  2019-09-20 21:53 [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 1/7] revert-aio Steven Rostedt
@ 2019-09-20 21:53 ` Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 3/7] revert-thermal Steven Rostedt
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Steven Rostedt @ 2019-09-20 21:53 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Thomas Gleixner, Carsten Emde, Sebastian Andrzej Siewior,
	John Kacur, Paul Gortmaker, Julia Cartwright, Daniel Wagner,
	tom.zanussi, Mike Galbraith, Benjamin LaHaise

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

[ Upstream commit 1a142116f6435ef070ecebb66d2d599507c10601 ]

|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768
|in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2
|2 locks held by rcuos/2/26:
| #0:  (rcu_callback){.+.+..}, at: [<ffffffff810b1a12>] rcu_nocb_kthread+0x1e2/0x380
| #1:  (rcu_read_lock_sched){.+.+..}, at: [<ffffffff812acd26>] percpu_ref_kill_rcu+0xa6/0x1c0
|Preemption disabled at:[<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
|Call Trace:
| [<ffffffff81582e9e>] dump_stack+0x4e/0x9c
| [<ffffffff81077aeb>] __might_sleep+0xfb/0x170
| [<ffffffff81589304>] rt_spin_lock+0x24/0x70
| [<ffffffff811c5790>] free_ioctx_users+0x30/0x130
| [<ffffffff812ace34>] percpu_ref_kill_rcu+0x1b4/0x1c0
| [<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
| [<ffffffff8106e046>] kthread+0xd6/0xf0
| [<ffffffff81591eec>] ret_from_fork+0x7c/0xb0

replace this preempt_disable() friendly swork.

Reported-By: Mike Galbraith <umgwanakikbuti@gmail.com>
Suggested-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 fs/aio.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 911e23087dfb..0c613d805bf1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -121,6 +121,7 @@ struct kioctx {
 	long			nr_pages;
 
 	struct rcu_work		free_rwork;	/* see free_ioctx() */
+	struct kthread_work	free_kwork;	/* see free_ioctx() */
 
 	/*
 	 * signals when all in-flight requests are done
@@ -606,9 +607,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  * now it's safe to cancel any that need to be.
  */
-static void free_ioctx_users(struct percpu_ref *ref)
+static void free_ioctx_users_work(struct kthread_work *work)
 {
-	struct kioctx *ctx = container_of(ref, struct kioctx, users);
+	struct kioctx *ctx = container_of(work, struct kioctx, free_kwork);
 	struct aio_kiocb *req;
 
 	spin_lock_irq(&ctx->ctx_lock);
@@ -626,6 +627,14 @@ static void free_ioctx_users(struct percpu_ref *ref)
 	percpu_ref_put(&ctx->reqs);
 }
 
+static void free_ioctx_users(struct percpu_ref *ref)
+{
+	struct kioctx *ctx = container_of(ref, struct kioctx, users);
+
+	kthread_init_work(&ctx->free_kwork, free_ioctx_users_work);
+	kthread_schedule_work(&ctx->free_kwork);
+}
+
 static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
 {
 	unsigned i, new_nr;
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC][PATCH RT 3/7] revert-thermal
  2019-09-20 21:53 [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 1/7] revert-aio Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 2/7] fs/aio: simple simple work Steven Rostedt
@ 2019-09-20 21:53 ` Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 4/7] thermal: Defer thermal wakups to threads Steven Rostedt
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Steven Rostedt @ 2019-09-20 21:53 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Thomas Gleixner, Carsten Emde, Sebastian Andrzej Siewior,
	John Kacur, Paul Gortmaker, Julia Cartwright, Daniel Wagner,
	tom.zanussi

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

Revert: thermal: Defer thermal wakups to threads

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 drivers/thermal/x86_pkg_temp_thermal.c | 52 ++------------------------
 1 file changed, 3 insertions(+), 49 deletions(-)

diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index a5991cbb408f..1ef937d799e4 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -29,7 +29,6 @@
 #include <linux/pm.h>
 #include <linux/thermal.h>
 #include <linux/debugfs.h>
-#include <linux/swork.h>
 #include <asm/cpu_device_id.h>
 #include <asm/mce.h>
 
@@ -330,7 +329,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
 	schedule_delayed_work_on(cpu, work, ms);
 }
 
-static void pkg_thermal_notify_work(struct swork_event *event)
+static int pkg_thermal_notify(u64 msr_val)
 {
 	int cpu = smp_processor_id();
 	struct pkg_device *pkgdev;
@@ -349,47 +348,9 @@ static void pkg_thermal_notify_work(struct swork_event *event)
 	}
 
 	spin_unlock_irqrestore(&pkg_temp_lock, flags);
-}
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-static struct swork_event notify_work;
-
-static int pkg_thermal_notify_work_init(void)
-{
-	int err;
-
-	err = swork_get();
-	if (err)
-		return err;
-
-	INIT_SWORK(&notify_work, pkg_thermal_notify_work);
 	return 0;
 }
 
-static void pkg_thermal_notify_work_cleanup(void)
-{
-	swork_put();
-}
-
-static int pkg_thermal_notify(u64 msr_val)
-{
-	swork_queue(&notify_work);
-	return 0;
-}
-
-#else  /* !CONFIG_PREEMPT_RT_FULL */
-
-static int pkg_thermal_notify_work_init(void) { return 0; }
-
-static void pkg_thermal_notify_work_cleanup(void) {  }
-
-static int pkg_thermal_notify(u64 msr_val)
-{
-	pkg_thermal_notify_work(NULL);
-	return 0;
-}
-#endif /* CONFIG_PREEMPT_RT_FULL */
-
 static int pkg_temp_thermal_device_add(unsigned int cpu)
 {
 	int pkgid = topology_logical_package_id(cpu);
@@ -554,16 +515,11 @@ static int __init pkg_temp_thermal_init(void)
 	if (!x86_match_cpu(pkg_temp_thermal_ids))
 		return -ENODEV;
 
-	if (!pkg_thermal_notify_work_init())
-		return -ENODEV;
-
 	max_packages = topology_max_packages();
 	packages = kcalloc(max_packages, sizeof(struct pkg_device *),
 			   GFP_KERNEL);
-	if (!packages) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!packages)
+		return -ENOMEM;
 
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
 				pkg_thermal_cpu_online,	pkg_thermal_cpu_offline);
@@ -581,7 +537,6 @@ static int __init pkg_temp_thermal_init(void)
 	return 0;
 
 err:
-	pkg_thermal_notify_work_cleanup();
 	kfree(packages);
 	return ret;
 }
@@ -595,7 +550,6 @@ static void __exit pkg_temp_thermal_exit(void)
 	cpuhp_remove_state(pkg_thermal_hp_state);
 	debugfs_remove_recursive(debugfs);
 	kfree(packages);
-	pkg_thermal_notify_work_cleanup();
 }
 module_exit(pkg_temp_thermal_exit)
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC][PATCH RT 4/7] thermal: Defer thermal wakups to threads
  2019-09-20 21:53 [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework Steven Rostedt
                   ` (2 preceding siblings ...)
  2019-09-20 21:53 ` [RFC][PATCH RT 3/7] revert-thermal Steven Rostedt
@ 2019-09-20 21:53 ` Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 5/7] revert-block Steven Rostedt
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Steven Rostedt @ 2019-09-20 21:53 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Thomas Gleixner, Carsten Emde, Sebastian Andrzej Siewior,
	John Kacur, Paul Gortmaker, Julia Cartwright, Daniel Wagner,
	tom.zanussi, Daniel Wagner

From: Daniel Wagner <wagi@monom.org>

[ Upstream commit ad2408dc248fe58536eef5b2b5734d8f9d3a280b ]

On RT the spin lock in pkg_temp_thermal_platfrom_thermal_notify will
call schedule while we run in irq context.

[<ffffffff816850ac>] dump_stack+0x4e/0x8f
[<ffffffff81680f7d>] __schedule_bug+0xa6/0xb4
[<ffffffff816896b4>] __schedule+0x5b4/0x700
[<ffffffff8168982a>] schedule+0x2a/0x90
[<ffffffff8168a8b5>] rt_spin_lock_slowlock+0xe5/0x2d0
[<ffffffff8168afd5>] rt_spin_lock+0x25/0x30
[<ffffffffa03a7b75>] pkg_temp_thermal_platform_thermal_notify+0x45/0x134 [x86_pkg_temp_thermal]
[<ffffffff8103d4db>] ? therm_throt_process+0x1b/0x160
[<ffffffff8103d831>] intel_thermal_interrupt+0x211/0x250
[<ffffffff8103d8c1>] smp_thermal_interrupt+0x21/0x40
[<ffffffff8169415d>] thermal_interrupt+0x6d/0x80

Let's defer the work to a kthread.

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
[bigeasy: reoder init/denit position. TODO: flush swork on exit]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 drivers/thermal/x86_pkg_temp_thermal.c | 28 +++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 1ef937d799e4..82f21fd4afb0 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -29,6 +29,7 @@
 #include <linux/pm.h>
 #include <linux/thermal.h>
 #include <linux/debugfs.h>
+#include <linux/kthread.h>
 #include <asm/cpu_device_id.h>
 #include <asm/mce.h>
 
@@ -329,7 +330,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
 	schedule_delayed_work_on(cpu, work, ms);
 }
 
-static int pkg_thermal_notify(u64 msr_val)
+static void pkg_thermal_notify_work(struct kthread_work *work)
 {
 	int cpu = smp_processor_id();
 	struct pkg_device *pkgdev;
@@ -348,8 +349,32 @@ static int pkg_thermal_notify(u64 msr_val)
 	}
 
 	spin_unlock_irqrestore(&pkg_temp_lock, flags);
+}
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+static DEFINE_KTHREAD_WORK(notify_work, pkg_thermal_notify_work);
+
+static int pkg_thermal_notify(u64 msr_val)
+{
+	kthread_schedule_work(&notify_work);
+	return 0;
+}
+
+static void pkg_thermal_notify_flush(void)
+{
+	kthread_flush_work(&notify_work);
+}
+
+#else  /* !CONFIG_PREEMPT_RT_FULL */
+
+static void pkg_thermal_notify_flush(void) { }
+
+static int pkg_thermal_notify(u64 msr_val)
+{
+	pkg_thermal_notify_work(NULL);
 	return 0;
 }
+#endif /* CONFIG_PREEMPT_RT_FULL */
 
 static int pkg_temp_thermal_device_add(unsigned int cpu)
 {
@@ -548,6 +573,7 @@ static void __exit pkg_temp_thermal_exit(void)
 	platform_thermal_package_rate_control = NULL;
 
 	cpuhp_remove_state(pkg_thermal_hp_state);
+	pkg_thermal_notify_flush();
 	debugfs_remove_recursive(debugfs);
 	kfree(packages);
 }
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC][PATCH RT 5/7] revert-block
  2019-09-20 21:53 [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework Steven Rostedt
                   ` (3 preceding siblings ...)
  2019-09-20 21:53 ` [RFC][PATCH RT 4/7] thermal: Defer thermal wakups to threads Steven Rostedt
@ 2019-09-20 21:53 ` Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 6/7] block: blk-mq: move blk_queue_usage_counter_release() into process context Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 7/7] workqueue: rework Steven Rostedt
  6 siblings, 0 replies; 8+ messages in thread
From: Steven Rostedt @ 2019-09-20 21:53 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Thomas Gleixner, Carsten Emde, Sebastian Andrzej Siewior,
	John Kacur, Paul Gortmaker, Julia Cartwright, Daniel Wagner,
	tom.zanussi

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

Revert swork version of: block: blk-mq: move blk_queue_usage_counter_release() into process context

In order to switch to upstream, we need to revert the swork code.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 block/blk-core.c       | 14 +-------------
 include/linux/blkdev.h |  2 --
 2 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f7e16b4466f0..3f08d6fd0787 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -969,21 +969,12 @@ void blk_queue_exit(struct request_queue *q)
 	percpu_ref_put(&q->q_usage_counter);
 }
 
-static void blk_queue_usage_counter_release_swork(struct swork_event *sev)
-{
-	struct request_queue *q =
-		container_of(sev, struct request_queue, mq_pcpu_wake);
-
-	wake_up_all(&q->mq_freeze_wq);
-}
-
 static void blk_queue_usage_counter_release(struct percpu_ref *ref)
 {
 	struct request_queue *q =
 		container_of(ref, struct request_queue, q_usage_counter);
 
-	if (wq_has_sleeper(&q->mq_freeze_wq))
-		swork_queue(&q->mq_pcpu_wake);
+	wake_up_all(&q->mq_freeze_wq);
 }
 
 static void blk_rq_timed_out_timer(struct timer_list *t)
@@ -1080,7 +1071,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 	queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
-	INIT_SWORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_swork);
 
 	/*
 	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
@@ -3970,8 +3960,6 @@ int __init blk_dev_init(void)
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
 
-	BUG_ON(swork_get());
-
 	request_cachep = kmem_cache_create("blkdev_requests",
 			sizeof(struct request), 0, SLAB_PANIC, NULL);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7b7c0bc6a514..f1960add94df 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,7 +27,6 @@
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
-#include <linux/swork.h>
 
 struct module;
 struct scsi_ioctl_command;
@@ -656,7 +655,6 @@ struct request_queue {
 #endif
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
-	struct swork_event	mq_pcpu_wake;
 	struct percpu_ref	q_usage_counter;
 	struct list_head	all_q_node;
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC][PATCH RT 6/7] block: blk-mq: move blk_queue_usage_counter_release() into process context
  2019-09-20 21:53 [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework Steven Rostedt
                   ` (4 preceding siblings ...)
  2019-09-20 21:53 ` [RFC][PATCH RT 5/7] revert-block Steven Rostedt
@ 2019-09-20 21:53 ` Steven Rostedt
  2019-09-20 21:53 ` [RFC][PATCH RT 7/7] workqueue: rework Steven Rostedt
  6 siblings, 0 replies; 8+ messages in thread
From: Steven Rostedt @ 2019-09-20 21:53 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Thomas Gleixner, Carsten Emde, Sebastian Andrzej Siewior,
	John Kacur, Paul Gortmaker, Julia Cartwright, Daniel Wagner,
	tom.zanussi

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

[ Upstream commit 61c928ecf4fe200bda9b49a0813b5ba0f43995b5 ]

| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914
| in_atomic(): 1, irqs_disabled(): 0, pid: 255, name: kworker/u257:6
| 5 locks held by kworker/u257:6/255:
|  #0:  ("events_unbound"){.+.+.+}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
|  #1:  ((&entry->work)){+.+.+.}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
|  #2:  (&shost->scan_mutex){+.+.+.}, at: [<ffffffffa000faa3>] __scsi_add_device+0xa3/0x130 [scsi_mod]
|  #3:  (&set->tag_list_lock){+.+...}, at: [<ffffffff812f09fa>] blk_mq_init_queue+0x96a/0xa50
|  #4:  (rcu_read_lock_sched){......}, at: [<ffffffff8132887d>] percpu_ref_kill_and_confirm+0x1d/0x120
| Preemption disabled at:[<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
|
| CPU: 2 PID: 255 Comm: kworker/u257:6 Not tainted 3.18.7-rt0+ #1
| Workqueue: events_unbound async_run_entry_fn
|  0000000000000003 ffff8800bc29f998 ffffffff815b3a12 0000000000000000
|  0000000000000000 ffff8800bc29f9b8 ffffffff8109aa16 ffff8800bc29fa28
|  ffff8800bc5d1bc8 ffff8800bc29f9e8 ffffffff815b8dd4 ffff880000000000
| Call Trace:
|  [<ffffffff815b3a12>] dump_stack+0x4f/0x7c
|  [<ffffffff8109aa16>] __might_sleep+0x116/0x190
|  [<ffffffff815b8dd4>] rt_spin_lock+0x24/0x60
|  [<ffffffff810b6089>] __wake_up+0x29/0x60
|  [<ffffffff812ee06e>] blk_mq_usage_counter_release+0x1e/0x20
|  [<ffffffff81328966>] percpu_ref_kill_and_confirm+0x106/0x120
|  [<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
|  [<ffffffff812f0000>] blk_mq_update_tag_set_depth+0x40/0xd0
|  [<ffffffff812f0a1c>] blk_mq_init_queue+0x98c/0xa50
|  [<ffffffffa000dcf0>] scsi_mq_alloc_queue+0x20/0x60 [scsi_mod]
|  [<ffffffffa000ea35>] scsi_alloc_sdev+0x2f5/0x370 [scsi_mod]
|  [<ffffffffa000f494>] scsi_probe_and_add_lun+0x9e4/0xdd0 [scsi_mod]
|  [<ffffffffa000fb26>] __scsi_add_device+0x126/0x130 [scsi_mod]
|  [<ffffffffa013033f>] ata_scsi_scan_host+0xaf/0x200 [libata]
|  [<ffffffffa012b5b6>] async_port_probe+0x46/0x60 [libata]
|  [<ffffffff810978fb>] async_run_entry_fn+0x3b/0xf0
|  [<ffffffff8108ee81>] process_one_work+0x201/0x5e0

percpu_ref_kill_and_confirm() invokes blk_mq_usage_counter_release() in
a rcu-sched region. swait based wake queue can't be used due to
wake_up_all() usage and disabled interrupts in !RT configs (as reported
by Corey Minyard).
The wq_has_sleeper() check has been suggested by Peter Zijlstra.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 block/blk-core.c       | 12 +++++++++++-
 include/linux/blkdev.h |  2 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 3f08d6fd0787..8d4c5d69c5c4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -969,12 +969,21 @@ void blk_queue_exit(struct request_queue *q)
 	percpu_ref_put(&q->q_usage_counter);
 }
 
+static void blk_queue_usage_counter_release_wrk(struct kthread_work *work)
+{
+	struct request_queue *q =
+		container_of(work, struct request_queue, mq_pcpu_wake);
+
+	wake_up_all(&q->mq_freeze_wq);
+}
+
 static void blk_queue_usage_counter_release(struct percpu_ref *ref)
 {
 	struct request_queue *q =
 		container_of(ref, struct request_queue, q_usage_counter);
 
-	wake_up_all(&q->mq_freeze_wq);
+	if (wq_has_sleeper(&q->mq_freeze_wq))
+		kthread_schedule_work(&q->mq_pcpu_wake);
 }
 
 static void blk_rq_timed_out_timer(struct timer_list *t)
@@ -1071,6 +1080,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 	queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
+	kthread_init_work(&q->mq_pcpu_wake, blk_queue_usage_counter_release_wrk);
 
 	/*
 	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f1960add94df..15a489abfb62 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -13,6 +13,7 @@
 #include <linux/llist.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 #include <linux/pagemap.h>
 #include <linux/backing-dev-defs.h>
 #include <linux/wait.h>
@@ -655,6 +656,7 @@ struct request_queue {
 #endif
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
+	struct kthread_work	mq_pcpu_wake;
 	struct percpu_ref	q_usage_counter;
 	struct list_head	all_q_node;
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC][PATCH RT 7/7] workqueue: rework
  2019-09-20 21:53 [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework Steven Rostedt
                   ` (5 preceding siblings ...)
  2019-09-20 21:53 ` [RFC][PATCH RT 6/7] block: blk-mq: move blk_queue_usage_counter_release() into process context Steven Rostedt
@ 2019-09-20 21:53 ` Steven Rostedt
  6 siblings, 0 replies; 8+ messages in thread
From: Steven Rostedt @ 2019-09-20 21:53 UTC (permalink / raw)
  To: linux-kernel, linux-rt-users
  Cc: Thomas Gleixner, Carsten Emde, Sebastian Andrzej Siewior,
	John Kacur, Paul Gortmaker, Julia Cartwright, Daniel Wagner,
	tom.zanussi

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

[ Upstream commit d15a862f24df983458533aebd6fa207ecdd1095a ]

This is an all-in change of the workqueue rework.
The worker_pool.lock is made to raw_spinlock_t. With this change we can
schedule workitems from preempt-disable sections and sections with disabled
interrupts. This change allows to remove all kthread_.* workarounds we used to
have.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 block/blk-core.c                       |   6 +-
 drivers/block/loop.c                   |   2 +-
 drivers/spi/spi-rockchip.c             |   1 -
 drivers/thermal/x86_pkg_temp_thermal.c |  28 +--
 fs/aio.c                               |  10 +-
 include/linux/blk-cgroup.h             |   2 +-
 include/linux/blkdev.h                 |   2 +-
 include/linux/interrupt.h              |   5 -
 include/linux/kthread-cgroup.h         |  17 --
 include/linux/kthread.h                |  15 +-
 include/linux/swait.h                  |  14 ++
 include/linux/workqueue.h              |   4 -
 init/main.c                            |   1 -
 kernel/irq/manage.c                    |  36 +--
 kernel/kthread.c                       |  14 --
 kernel/sched/core.c                    |   1 +
 kernel/time/hrtimer.c                  |  24 --
 kernel/workqueue.c                     | 300 +++++++++++--------------
 18 files changed, 164 insertions(+), 318 deletions(-)
 delete mode 100644 include/linux/kthread-cgroup.h

diff --git a/block/blk-core.c b/block/blk-core.c
index 8d4c5d69c5c4..f0cb86ef8215 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -969,7 +969,7 @@ void blk_queue_exit(struct request_queue *q)
 	percpu_ref_put(&q->q_usage_counter);
 }
 
-static void blk_queue_usage_counter_release_wrk(struct kthread_work *work)
+static void blk_queue_usage_counter_release_wrk(struct work_struct *work)
 {
 	struct request_queue *q =
 		container_of(work, struct request_queue, mq_pcpu_wake);
@@ -983,7 +983,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
 		container_of(ref, struct request_queue, q_usage_counter);
 
 	if (wq_has_sleeper(&q->mq_freeze_wq))
-		kthread_schedule_work(&q->mq_pcpu_wake);
+		schedule_work(&q->mq_pcpu_wake);
 }
 
 static void blk_rq_timed_out_timer(struct timer_list *t)
@@ -1080,7 +1080,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 	queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
-	kthread_init_work(&q->mq_pcpu_wake, blk_queue_usage_counter_release_wrk);
+	INIT_WORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_wrk);
 
 	/*
 	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 298c96b6befa..cef8e00c9d9d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -70,7 +70,7 @@
 #include <linux/writeback.h>
 #include <linux/completion.h>
 #include <linux/highmem.h>
-#include <linux/kthread-cgroup.h>
+#include <linux/kthread.h>
 #include <linux/splice.h>
 #include <linux/sysfs.h>
 #include <linux/miscdevice.h>
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index b56619418cea..fdcf3076681b 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -22,7 +22,6 @@
 #include <linux/spi/spi.h>
 #include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
-#include <linux/interrupt.h>
 
 #define DRIVER_NAME "rockchip-spi"
 
diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 82f21fd4afb0..1ef937d799e4 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -29,7 +29,6 @@
 #include <linux/pm.h>
 #include <linux/thermal.h>
 #include <linux/debugfs.h>
-#include <linux/kthread.h>
 #include <asm/cpu_device_id.h>
 #include <asm/mce.h>
 
@@ -330,7 +329,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
 	schedule_delayed_work_on(cpu, work, ms);
 }
 
-static void pkg_thermal_notify_work(struct kthread_work *work)
+static int pkg_thermal_notify(u64 msr_val)
 {
 	int cpu = smp_processor_id();
 	struct pkg_device *pkgdev;
@@ -349,32 +348,8 @@ static void pkg_thermal_notify_work(struct kthread_work *work)
 	}
 
 	spin_unlock_irqrestore(&pkg_temp_lock, flags);
-}
-
-#ifdef CONFIG_PREEMPT_RT_FULL
-static DEFINE_KTHREAD_WORK(notify_work, pkg_thermal_notify_work);
-
-static int pkg_thermal_notify(u64 msr_val)
-{
-	kthread_schedule_work(&notify_work);
-	return 0;
-}
-
-static void pkg_thermal_notify_flush(void)
-{
-	kthread_flush_work(&notify_work);
-}
-
-#else  /* !CONFIG_PREEMPT_RT_FULL */
-
-static void pkg_thermal_notify_flush(void) { }
-
-static int pkg_thermal_notify(u64 msr_val)
-{
-	pkg_thermal_notify_work(NULL);
 	return 0;
 }
-#endif /* CONFIG_PREEMPT_RT_FULL */
 
 static int pkg_temp_thermal_device_add(unsigned int cpu)
 {
@@ -573,7 +548,6 @@ static void __exit pkg_temp_thermal_exit(void)
 	platform_thermal_package_rate_control = NULL;
 
 	cpuhp_remove_state(pkg_thermal_hp_state);
-	pkg_thermal_notify_flush();
 	debugfs_remove_recursive(debugfs);
 	kfree(packages);
 }
diff --git a/fs/aio.c b/fs/aio.c
index 0c613d805bf1..c74dd321f5b7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -121,7 +121,7 @@ struct kioctx {
 	long			nr_pages;
 
 	struct rcu_work		free_rwork;	/* see free_ioctx() */
-	struct kthread_work	free_kwork;	/* see free_ioctx() */
+	struct work_struct	free_work;	/* see free_ioctx() */
 
 	/*
 	 * signals when all in-flight requests are done
@@ -607,9 +607,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
  * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  * now it's safe to cancel any that need to be.
  */
-static void free_ioctx_users_work(struct kthread_work *work)
+static void free_ioctx_users_work(struct work_struct *work)
 {
-	struct kioctx *ctx = container_of(work, struct kioctx, free_kwork);
+	struct kioctx *ctx = container_of(work, struct kioctx, free_work);
 	struct aio_kiocb *req;
 
 	spin_lock_irq(&ctx->ctx_lock);
@@ -631,8 +631,8 @@ static void free_ioctx_users(struct percpu_ref *ref)
 {
 	struct kioctx *ctx = container_of(ref, struct kioctx, users);
 
-	kthread_init_work(&ctx->free_kwork, free_ioctx_users_work);
-	kthread_schedule_work(&ctx->free_kwork);
+	INIT_WORK(&ctx->free_work, free_ioctx_users_work);
+	schedule_work(&ctx->free_work);
 }
 
 static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 0473efda4c65..da587e60fe86 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -14,7 +14,7 @@
  * 	              Nauman Rafique <nauman@google.com>
  */
 
-#include <linux/kthread-cgroup.h>
+#include <linux/kthread.h>
 #include <linux/percpu_counter.h>
 #include <linux/seq_file.h>
 #include <linux/radix-tree.h>
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 15a489abfb62..45653e23e4cd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -656,7 +656,7 @@ struct request_queue {
 #endif
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
-	struct kthread_work	mq_pcpu_wake;
+	struct work_struct	mq_pcpu_wake;
 	struct percpu_ref	q_usage_counter;
 	struct list_head	all_q_node;
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a9321f6429f2..97d9ba26915e 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -13,7 +13,6 @@
 #include <linux/hrtimer.h>
 #include <linux/kref.h>
 #include <linux/workqueue.h>
-#include <linux/kthread.h>
 
 #include <linux/atomic.h>
 #include <asm/ptrace.h>
@@ -239,11 +238,7 @@ extern void resume_device_irqs(void);
 struct irq_affinity_notify {
 	unsigned int irq;
 	struct kref kref;
-#ifdef CONFIG_PREEMPT_RT_BASE
-	struct kthread_work work;
-#else
 	struct work_struct work;
-#endif
 	void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
 	void (*release)(struct kref *ref);
 };
diff --git a/include/linux/kthread-cgroup.h b/include/linux/kthread-cgroup.h
deleted file mode 100644
index 53d34bca9d72..000000000000
--- a/include/linux/kthread-cgroup.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_KTHREAD_CGROUP_H
-#define _LINUX_KTHREAD_CGROUP_H
-#include <linux/kthread.h>
-#include <linux/cgroup.h>
-
-#ifdef CONFIG_BLK_CGROUP
-void kthread_associate_blkcg(struct cgroup_subsys_state *css);
-struct cgroup_subsys_state *kthread_blkcg(void);
-#else
-static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { }
-static inline struct cgroup_subsys_state *kthread_blkcg(void)
-{
-	return NULL;
-}
-#endif
-#endif
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 7cf56eb54103..6b8c064f0cbc 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -4,6 +4,7 @@
 /* Simple interface for creating and stopping kernel threads without mess. */
 #include <linux/err.h>
 #include <linux/sched.h>
+#include <linux/cgroup.h>
 
 __printf(4, 5)
 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
@@ -197,12 +198,14 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work);
 
 void kthread_destroy_worker(struct kthread_worker *worker);
 
-extern struct kthread_worker kthread_global_worker;
-void kthread_init_global_worker(void);
-
-static inline bool kthread_schedule_work(struct kthread_work *work)
+#ifdef CONFIG_BLK_CGROUP
+void kthread_associate_blkcg(struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *kthread_blkcg(void);
+#else
+static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { }
+static inline struct cgroup_subsys_state *kthread_blkcg(void)
 {
-	return kthread_queue_work(&kthread_global_worker, work);
+	return NULL;
 }
-
+#endif
 #endif /* _LINUX_KTHREAD_H */
diff --git a/include/linux/swait.h b/include/linux/swait.h
index f426a0661aa0..21ae66cd41d3 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -299,4 +299,18 @@ do {									\
 	__ret;								\
 })
 
+#define __swait_event_lock_irq(wq, condition, lock, cmd)		\
+	___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
+		       raw_spin_unlock_irq(&lock);			\
+		       cmd;						\
+		       schedule();					\
+		       raw_spin_lock_irq(&lock))
+
+#define swait_event_lock_irq(wq_head, condition, lock)			\
+	do {								\
+		if (condition)						\
+			break;						\
+		__swait_event_lock_irq(wq_head, condition, lock, );	\
+	} while (0)
+
 #endif /* _LINUX_SWAIT_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 60d673e15632..546aa73fba6a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -455,10 +455,6 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
-void free_workqueue_attrs(struct workqueue_attrs *attrs);
-int apply_workqueue_attrs(struct workqueue_struct *wq,
-			  const struct workqueue_attrs *attrs);
 int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
diff --git a/init/main.c b/init/main.c
index b0e95351c22c..4a7471606e53 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1130,7 +1130,6 @@ static noinline void __init kernel_init_freeable(void)
 	smp_prepare_cpus(setup_max_cpus);
 
 	workqueue_init();
-	kthread_init_global_worker();
 
 	init_mm_internals();
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 290cd520dba1..82b3728685ca 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -285,12 +285,7 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
 
 	if (desc->affinity_notify) {
 		kref_get(&desc->affinity_notify->kref);
-
-#ifdef CONFIG_PREEMPT_RT_BASE
-		kthread_schedule_work(&desc->affinity_notify->work);
-#else
 		schedule_work(&desc->affinity_notify->work);
-#endif
 	}
 	irqd_set(data, IRQD_AFFINITY_SET);
 
@@ -328,8 +323,10 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
 }
 EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
 
-static void _irq_affinity_notify(struct irq_affinity_notify *notify)
+static void irq_affinity_notify(struct work_struct *work)
 {
+	struct irq_affinity_notify *notify =
+		container_of(work, struct irq_affinity_notify, work);
 	struct irq_desc *desc = irq_to_desc(notify->irq);
 	cpumask_var_t cpumask;
 	unsigned long flags;
@@ -351,25 +348,6 @@ static void _irq_affinity_notify(struct irq_affinity_notify *notify)
 	kref_put(&notify->kref, notify->release);
 }
 
-#ifdef CONFIG_PREEMPT_RT_BASE
-
-static void irq_affinity_notify(struct kthread_work *work)
-{
-	struct irq_affinity_notify *notify =
-		container_of(work, struct irq_affinity_notify, work);
-	_irq_affinity_notify(notify);
-}
-
-#else
-
-static void irq_affinity_notify(struct work_struct *work)
-{
-	struct irq_affinity_notify *notify =
-		container_of(work, struct irq_affinity_notify, work);
-	_irq_affinity_notify(notify);
-}
-#endif
-
 /**
  *	irq_set_affinity_notifier - control notification of IRQ affinity changes
  *	@irq:		Interrupt for which to enable/disable notification
@@ -398,11 +376,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 	if (notify) {
 		notify->irq = irq;
 		kref_init(&notify->kref);
-#ifdef CONFIG_PREEMPT_RT_BASE
-		kthread_init_work(&notify->work, irq_affinity_notify);
-#else
 		INIT_WORK(&notify->work, irq_affinity_notify);
-#endif
 	}
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
@@ -411,11 +385,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	if (old_notify) {
-#ifdef CONFIG_PREEMPT_RT_BASE
-		kthread_cancel_work_sync(&notify->work);
-#else
 		cancel_work_sync(&old_notify->work);
-#endif
 		kref_put(&old_notify->kref, old_notify->release);
 	}
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 9db017761a1f..5641b55783a6 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,7 +20,6 @@
 #include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
-#include <linux/cgroup.h>
 #include <trace/events/sched.h>
 
 static DEFINE_SPINLOCK(kthread_create_lock);
@@ -1181,19 +1180,6 @@ void kthread_destroy_worker(struct kthread_worker *worker)
 }
 EXPORT_SYMBOL(kthread_destroy_worker);
 
-DEFINE_KTHREAD_WORKER(kthread_global_worker);
-EXPORT_SYMBOL(kthread_global_worker);
-
-__init void kthread_init_global_worker(void)
-{
-	kthread_global_worker.task = kthread_create(kthread_worker_fn,
-						    &kthread_global_worker,
-						    "kswork");
-	if (WARN_ON(IS_ERR(kthread_global_worker.task)))
-		return;
-	wake_up_process(kthread_global_worker.task);
-}
-
 #ifdef CONFIG_BLK_CGROUP
 /**
  * kthread_associate_blkcg - associate blkcg to current kthread
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 34e786c60d79..dd14d3960ab8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3566,6 +3566,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
 {
 	if (!tsk->state)
 		return;
+
 	/*
 	 * If a worker went to sleep, notify and ask workqueue whether
 	 * it wants to wake up a task to maintain concurrency.
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 4534e7871c8c..67951292df58 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -730,29 +730,6 @@ static void hrtimer_switch_to_hres(void)
 	retrigger_next_event(NULL);
 }
 
-#ifdef CONFIG_PREEMPT_RT_FULL
-
-static struct swork_event clock_set_delay_work;
-
-static void run_clock_set_delay(struct swork_event *event)
-{
-	clock_was_set();
-}
-
-void clock_was_set_delayed(void)
-{
-	swork_queue(&clock_set_delay_work);
-}
-
-static __init int create_clock_set_delay_thread(void)
-{
-	WARN_ON(swork_get());
-	INIT_SWORK(&clock_set_delay_work, run_clock_set_delay);
-	return 0;
-}
-early_initcall(create_clock_set_delay_thread);
-#else /* PREEMPT_RT_FULL */
-
 static void clock_was_set_work(struct work_struct *work)
 {
 	clock_was_set();
@@ -768,7 +745,6 @@ void clock_was_set_delayed(void)
 {
 	schedule_work(&hrtimer_work);
 }
-#endif
 
 #else
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 12137825bf5a..f6d12421e7bb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -49,8 +49,6 @@
 #include <linux/uaccess.h>
 #include <linux/sched/isolation.h>
 #include <linux/nmi.h>
-#include <linux/locallock.h>
-#include <linux/delay.h>
 
 #include "workqueue_internal.h"
 
@@ -125,11 +123,6 @@ enum {
  *    cpu or grabbing pool->lock is enough for read access.  If
  *    POOL_DISASSOCIATED is set, it's identical to L.
  *
- *    On RT we need the extra protection via rt_lock_idle_list() for
- *    the list manipulations against read access from
- *    wq_worker_sleeping(). All other places are nicely serialized via
- *    pool->lock.
- *
  * A: wq_pool_attach_mutex protected.
  *
  * PL: wq_pool_mutex protected.
@@ -151,7 +144,7 @@ enum {
 /* struct worker is defined in workqueue_internal.h */
 
 struct worker_pool {
-	spinlock_t		lock;		/* the pool lock */
+	raw_spinlock_t		lock;		/* the pool lock */
 	int			cpu;		/* I: the associated cpu */
 	int			node;		/* I: the associated node ID */
 	int			id;		/* I: pool ID */
@@ -304,8 +297,8 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
 static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
-static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
-static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
+static DEFINE_RAW_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
+static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
 
 static LIST_HEAD(workqueues);		/* PR: list of all workqueues */
 static bool workqueue_freezing;		/* PL: have wqs started freezing? */
@@ -357,8 +350,6 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq);
 struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
 
-static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
-
 static int worker_thread(void *__worker);
 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
 
@@ -435,31 +426,6 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
 		if (({ assert_rcu_or_wq_mutex(wq); false; })) { }	\
 		else
 
-#ifdef CONFIG_PREEMPT_RT_BASE
-static inline void rt_lock_idle_list(struct worker_pool *pool)
-{
-	preempt_disable();
-}
-static inline void rt_unlock_idle_list(struct worker_pool *pool)
-{
-	preempt_enable();
-}
-static inline void sched_lock_idle_list(struct worker_pool *pool) { }
-static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
-#else
-static inline void rt_lock_idle_list(struct worker_pool *pool) { }
-static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
-static inline void sched_lock_idle_list(struct worker_pool *pool)
-{
-	spin_lock_irq(&pool->lock);
-}
-static inline void sched_unlock_idle_list(struct worker_pool *pool)
-{
-	spin_unlock_irq(&pool->lock);
-}
-#endif
-
-
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
 static struct debug_obj_descr work_debug_descr;
@@ -862,20 +828,14 @@ static struct worker *first_idle_worker(struct worker_pool *pool)
  * Wake up the first idle worker of @pool.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  */
 static void wake_up_worker(struct worker_pool *pool)
 {
-	struct worker *worker;
-
-	rt_lock_idle_list(pool);
-
-	worker = first_idle_worker(pool);
+	struct worker *worker = first_idle_worker(pool);
 
 	if (likely(worker))
 		wake_up_process(worker->task);
-
-	rt_unlock_idle_list(pool);
 }
 
 /**
@@ -904,7 +864,7 @@ void wq_worker_running(struct task_struct *task)
  */
 void wq_worker_sleeping(struct task_struct *task)
 {
-	struct worker *worker = kthread_data(task);
+	struct worker *next, *worker = kthread_data(task);
 	struct worker_pool *pool;
 
 	/*
@@ -921,18 +881,26 @@ void wq_worker_sleeping(struct task_struct *task)
 		return;
 
 	worker->sleeping = 1;
+	raw_spin_lock_irq(&pool->lock);
 
 	/*
 	 * The counterpart of the following dec_and_test, implied mb,
 	 * worklist not empty test sequence is in insert_work().
 	 * Please read comment there.
+	 *
+	 * NOT_RUNNING is clear.  This means that we're bound to and
+	 * running on the local cpu w/ rq lock held and preemption
+	 * disabled, which in turn means that none else could be
+	 * manipulating idle_list, so dereferencing idle_list without pool
+	 * lock is safe.
 	 */
 	if (atomic_dec_and_test(&pool->nr_running) &&
 	    !list_empty(&pool->worklist)) {
-		sched_lock_idle_list(pool);
-		wake_up_worker(pool);
-		sched_unlock_idle_list(pool);
+		next = first_idle_worker(pool);
+		if (next)
+			wake_up_process(next->task);
 	}
+	raw_spin_unlock_irq(&pool->lock);
 }
 
 /**
@@ -943,7 +911,7 @@ void wq_worker_sleeping(struct task_struct *task)
  * Set @flags in @worker->flags and adjust nr_running accordingly.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock)
+ * raw_spin_lock_irq(pool->lock)
  */
 static inline void worker_set_flags(struct worker *worker, unsigned int flags)
 {
@@ -968,7 +936,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags)
  * Clear @flags in @worker->flags and adjust nr_running accordingly.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock)
+ * raw_spin_lock_irq(pool->lock)
  */
 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
 {
@@ -1016,7 +984,7 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
  * actually occurs, it should be easy to locate the culprit work function.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  *
  * Return:
  * Pointer to worker which is executing @work if found, %NULL
@@ -1051,7 +1019,7 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
  * nested inside outer list_for_each_entry_safe().
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  */
 static void move_linked_works(struct work_struct *work, struct list_head *head,
 			      struct work_struct **nextp)
@@ -1129,11 +1097,9 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
 		 * As both pwqs and pools are RCU protected, the
 		 * following lock operations are safe.
 		 */
-		rcu_read_lock();
-		local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
+		raw_spin_lock_irq(&pwq->pool->lock);
 		put_pwq(pwq);
-		local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
-		rcu_read_unlock();
+		raw_spin_unlock_irq(&pwq->pool->lock);
 	}
 }
 
@@ -1166,7 +1132,7 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
  * decrement nr_in_flight of its pwq and handle workqueue flushing.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  */
 static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 {
@@ -1237,7 +1203,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 	struct worker_pool *pool;
 	struct pool_workqueue *pwq;
 
-	local_lock_irqsave(pendingb_lock, *flags);
+	local_irq_save(*flags);
 
 	/* try to steal the timer if it exists */
 	if (is_dwork) {
@@ -1265,7 +1231,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 	if (!pool)
 		goto fail;
 
-	spin_lock(&pool->lock);
+	raw_spin_lock(&pool->lock);
 	/*
 	 * work->data is guaranteed to point to pwq only while the work
 	 * item is queued on pwq->wq, and both updating work->data to point
@@ -1294,17 +1260,17 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 		/* work->data points to pwq iff queued, point to pool */
 		set_work_pool_and_keep_pending(work, pool->id);
 
-		spin_unlock(&pool->lock);
+		raw_spin_unlock(&pool->lock);
 		rcu_read_unlock();
 		return 1;
 	}
-	spin_unlock(&pool->lock);
+	raw_spin_unlock(&pool->lock);
 fail:
 	rcu_read_unlock();
-	local_unlock_irqrestore(pendingb_lock, *flags);
+	local_irq_restore(*flags);
 	if (work_is_canceling(work))
 		return -ENOENT;
-	cpu_chill();
+	cpu_relax();
 	return -EAGAIN;
 }
 
@@ -1319,7 +1285,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
  * work_struct flags.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  */
 static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
 			struct list_head *head, unsigned int extra_flags)
@@ -1406,13 +1372,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	 * queued or lose PENDING.  Grabbing PENDING and queueing should
 	 * happen with IRQ disabled.
 	 */
-#ifndef CONFIG_PREEMPT_RT_FULL
-	/*
-	 * nort: On RT the "interrupts-disabled" rule has been replaced with
-	 * pendingb_lock.
-	 */
 	lockdep_assert_irqs_disabled();
-#endif
 
 	debug_work_activate(work);
 
@@ -1440,7 +1400,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	if (last_pool && last_pool != pwq->pool) {
 		struct worker *worker;
 
-		spin_lock(&last_pool->lock);
+		raw_spin_lock(&last_pool->lock);
 
 		worker = find_worker_executing_work(last_pool, work);
 
@@ -1448,11 +1408,11 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 			pwq = worker->current_pwq;
 		} else {
 			/* meh... not running there, queue here */
-			spin_unlock(&last_pool->lock);
-			spin_lock(&pwq->pool->lock);
+			raw_spin_unlock(&last_pool->lock);
+			raw_spin_lock(&pwq->pool->lock);
 		}
 	} else {
-		spin_lock(&pwq->pool->lock);
+		raw_spin_lock(&pwq->pool->lock);
 	}
 
 	/*
@@ -1465,7 +1425,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	 */
 	if (unlikely(!pwq->refcnt)) {
 		if (wq->flags & WQ_UNBOUND) {
-			spin_unlock(&pwq->pool->lock);
+			raw_spin_unlock(&pwq->pool->lock);
 			cpu_relax();
 			goto retry;
 		}
@@ -1497,7 +1457,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	insert_work(pwq, work, worklist, work_flags);
 
 out:
-	spin_unlock(&pwq->pool->lock);
+	raw_spin_unlock(&pwq->pool->lock);
 	rcu_read_unlock();
 }
 
@@ -1518,14 +1478,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
 	bool ret = false;
 	unsigned long flags;
 
-	local_lock_irqsave(pendingb_lock,flags);
+	local_irq_save(flags);
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		__queue_work(cpu, wq, work);
 		ret = true;
 	}
 
-	local_unlock_irqrestore(pendingb_lock, flags);
+	local_irq_restore(flags);
 	return ret;
 }
 EXPORT_SYMBOL(queue_work_on);
@@ -1533,12 +1493,11 @@ EXPORT_SYMBOL(queue_work_on);
 void delayed_work_timer_fn(struct timer_list *t)
 {
 	struct delayed_work *dwork = from_timer(dwork, t, timer);
+	unsigned long flags;
 
-	/* XXX */
-	/* local_lock(pendingb_lock); */
-	/* should have been called from irqsafe timer with irq already off */
+	local_irq_save(flags);
 	__queue_work(dwork->cpu, dwork->wq, &dwork->work);
-	/* local_unlock(pendingb_lock); */
+	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(delayed_work_timer_fn);
 
@@ -1593,14 +1552,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	unsigned long flags;
 
 	/* read the comment in __queue_work() */
-	local_lock_irqsave(pendingb_lock, flags);
+	local_irq_save(flags);
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		__queue_delayed_work(cpu, wq, dwork, delay);
 		ret = true;
 	}
 
-	local_unlock_irqrestore(pendingb_lock, flags);
+	local_irq_restore(flags);
 	return ret;
 }
 EXPORT_SYMBOL(queue_delayed_work_on);
@@ -1635,7 +1594,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 	if (likely(ret >= 0)) {
 		__queue_delayed_work(cpu, wq, dwork, delay);
-		local_unlock_irqrestore(pendingb_lock, flags);
+		local_irq_restore(flags);
 	}
 
 	/* -ENOENT from try_to_grab_pending() becomes %true */
@@ -1646,12 +1605,11 @@ EXPORT_SYMBOL_GPL(mod_delayed_work_on);
 static void rcu_work_rcufn(struct rcu_head *rcu)
 {
 	struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
-	unsigned long flags;
 
 	/* read the comment in __queue_work() */
-	local_lock_irqsave(pendingb_lock, flags);
+	local_irq_disable();
 	__queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
-	local_unlock_irqrestore(pendingb_lock, flags);
+	local_irq_enable();
 }
 
 /**
@@ -1686,7 +1644,7 @@ EXPORT_SYMBOL(queue_rcu_work);
  * necessary.
  *
  * LOCKING:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  */
 static void worker_enter_idle(struct worker *worker)
 {
@@ -1703,9 +1661,7 @@ static void worker_enter_idle(struct worker *worker)
 	worker->last_active = jiffies;
 
 	/* idle_list is LIFO */
-	rt_lock_idle_list(pool);
 	list_add(&worker->entry, &pool->idle_list);
-	rt_unlock_idle_list(pool);
 
 	if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
 		mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
@@ -1728,7 +1684,7 @@ static void worker_enter_idle(struct worker *worker)
  * @worker is leaving idle state.  Update stats.
  *
  * LOCKING:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  */
 static void worker_leave_idle(struct worker *worker)
 {
@@ -1738,9 +1694,7 @@ static void worker_leave_idle(struct worker *worker)
 		return;
 	worker_clr_flags(worker, WORKER_IDLE);
 	pool->nr_idle--;
-	rt_lock_idle_list(pool);
 	list_del_init(&worker->entry);
-	rt_unlock_idle_list(pool);
 }
 
 static struct worker *alloc_worker(int node)
@@ -1868,11 +1822,11 @@ static struct worker *create_worker(struct worker_pool *pool)
 	worker_attach_to_pool(worker, pool);
 
 	/* start the newly created worker */
-	spin_lock_irq(&pool->lock);
+	raw_spin_lock_irq(&pool->lock);
 	worker->pool->nr_workers++;
 	worker_enter_idle(worker);
 	wake_up_process(worker->task);
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 
 	return worker;
 
@@ -1891,7 +1845,7 @@ static struct worker *create_worker(struct worker_pool *pool)
  * be idle.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  */
 static void destroy_worker(struct worker *worker)
 {
@@ -1908,9 +1862,7 @@ static void destroy_worker(struct worker *worker)
 	pool->nr_workers--;
 	pool->nr_idle--;
 
-	rt_lock_idle_list(pool);
 	list_del_init(&worker->entry);
-	rt_unlock_idle_list(pool);
 	worker->flags |= WORKER_DIE;
 	wake_up_process(worker->task);
 }
@@ -1919,7 +1871,7 @@ static void idle_worker_timeout(struct timer_list *t)
 {
 	struct worker_pool *pool = from_timer(pool, t, idle_timer);
 
-	spin_lock_irq(&pool->lock);
+	raw_spin_lock_irq(&pool->lock);
 
 	while (too_many_workers(pool)) {
 		struct worker *worker;
@@ -1937,7 +1889,7 @@ static void idle_worker_timeout(struct timer_list *t)
 		destroy_worker(worker);
 	}
 
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 }
 
 static void send_mayday(struct work_struct *work)
@@ -1968,8 +1920,8 @@ static void pool_mayday_timeout(struct timer_list *t)
 	struct worker_pool *pool = from_timer(pool, t, mayday_timer);
 	struct work_struct *work;
 
-	spin_lock_irq(&pool->lock);
-	spin_lock(&wq_mayday_lock);		/* for wq->maydays */
+	raw_spin_lock_irq(&pool->lock);
+	raw_spin_lock(&wq_mayday_lock);		/* for wq->maydays */
 
 	if (need_to_create_worker(pool)) {
 		/*
@@ -1982,8 +1934,8 @@ static void pool_mayday_timeout(struct timer_list *t)
 			send_mayday(work);
 	}
 
-	spin_unlock(&wq_mayday_lock);
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock(&wq_mayday_lock);
+	raw_spin_unlock_irq(&pool->lock);
 
 	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
 }
@@ -2002,7 +1954,7 @@ static void pool_mayday_timeout(struct timer_list *t)
  * may_start_working() %true.
  *
  * LOCKING:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
  * multiple times.  Does GFP_KERNEL allocations.  Called only from
  * manager.
  */
@@ -2011,7 +1963,7 @@ __releases(&pool->lock)
 __acquires(&pool->lock)
 {
 restart:
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 
 	/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
 	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
@@ -2027,7 +1979,7 @@ __acquires(&pool->lock)
 	}
 
 	del_timer_sync(&pool->mayday_timer);
-	spin_lock_irq(&pool->lock);
+	raw_spin_lock_irq(&pool->lock);
 	/*
 	 * This is necessary even after a new worker was just successfully
 	 * created as @pool->lock was dropped and the new worker might have
@@ -2050,7 +2002,7 @@ __acquires(&pool->lock)
  * and may_start_working() is true.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
  * multiple times.  Does GFP_KERNEL allocations.
  *
  * Return:
@@ -2073,7 +2025,7 @@ static bool manage_workers(struct worker *worker)
 
 	pool->manager = NULL;
 	pool->flags &= ~POOL_MANAGER_ACTIVE;
-	wake_up(&wq_manager_wait);
+	swake_up_one(&wq_manager_wait);
 	return true;
 }
 
@@ -2089,7 +2041,7 @@ static bool manage_workers(struct worker *worker)
  * call this function to process a work.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock) which is released and regrabbed.
+ * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
  */
 static void process_one_work(struct worker *worker, struct work_struct *work)
 __releases(&pool->lock)
@@ -2171,7 +2123,7 @@ __acquires(&pool->lock)
 	 */
 	set_work_pool_and_clear_pending(work, pool->id);
 
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 
 	lock_map_acquire(&pwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
@@ -2226,7 +2178,7 @@ __acquires(&pool->lock)
 	 */
 	cond_resched();
 
-	spin_lock_irq(&pool->lock);
+	raw_spin_lock_irq(&pool->lock);
 
 	/* clear cpu intensive status */
 	if (unlikely(cpu_intensive))
@@ -2249,7 +2201,7 @@ __acquires(&pool->lock)
  * fetches a work from the top and executes it.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
  * multiple times.
  */
 static void process_scheduled_works(struct worker *worker)
@@ -2291,11 +2243,11 @@ static int worker_thread(void *__worker)
 	/* tell the scheduler that this is a workqueue worker */
 	set_pf_worker(true);
 woke_up:
-	spin_lock_irq(&pool->lock);
+	raw_spin_lock_irq(&pool->lock);
 
 	/* am I supposed to die? */
 	if (unlikely(worker->flags & WORKER_DIE)) {
-		spin_unlock_irq(&pool->lock);
+		raw_spin_unlock_irq(&pool->lock);
 		WARN_ON_ONCE(!list_empty(&worker->entry));
 		set_pf_worker(false);
 
@@ -2361,7 +2313,7 @@ static int worker_thread(void *__worker)
 	 */
 	worker_enter_idle(worker);
 	__set_current_state(TASK_IDLE);
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 	schedule();
 	goto woke_up;
 }
@@ -2415,7 +2367,7 @@ static int rescuer_thread(void *__rescuer)
 	should_stop = kthread_should_stop();
 
 	/* see whether any pwq is asking for help */
-	spin_lock_irq(&wq_mayday_lock);
+	raw_spin_lock_irq(&wq_mayday_lock);
 
 	while (!list_empty(&wq->maydays)) {
 		struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
@@ -2427,11 +2379,11 @@ static int rescuer_thread(void *__rescuer)
 		__set_current_state(TASK_RUNNING);
 		list_del_init(&pwq->mayday_node);
 
-		spin_unlock_irq(&wq_mayday_lock);
+		raw_spin_unlock_irq(&wq_mayday_lock);
 
 		worker_attach_to_pool(rescuer, pool);
 
-		spin_lock_irq(&pool->lock);
+		raw_spin_lock_irq(&pool->lock);
 
 		/*
 		 * Slurp in all works issued via this workqueue and
@@ -2460,10 +2412,10 @@ static int rescuer_thread(void *__rescuer)
 			 * incur MAYDAY_INTERVAL delay inbetween.
 			 */
 			if (need_to_create_worker(pool)) {
-				spin_lock(&wq_mayday_lock);
+				raw_spin_lock(&wq_mayday_lock);
 				get_pwq(pwq);
 				list_move_tail(&pwq->mayday_node, &wq->maydays);
-				spin_unlock(&wq_mayday_lock);
+				raw_spin_unlock(&wq_mayday_lock);
 			}
 		}
 
@@ -2481,14 +2433,14 @@ static int rescuer_thread(void *__rescuer)
 		if (need_more_worker(pool))
 			wake_up_worker(pool);
 
-		spin_unlock_irq(&pool->lock);
+		raw_spin_unlock_irq(&pool->lock);
 
 		worker_detach_from_pool(rescuer);
 
-		spin_lock_irq(&wq_mayday_lock);
+		raw_spin_lock_irq(&wq_mayday_lock);
 	}
 
-	spin_unlock_irq(&wq_mayday_lock);
+	raw_spin_unlock_irq(&wq_mayday_lock);
 
 	if (should_stop) {
 		__set_current_state(TASK_RUNNING);
@@ -2568,7 +2520,7 @@ static void wq_barrier_func(struct work_struct *work)
  * underneath us, so we can't reliably determine pwq from @target.
  *
  * CONTEXT:
- * spin_lock_irq(pool->lock).
+ * raw_spin_lock_irq(pool->lock).
  */
 static void insert_wq_barrier(struct pool_workqueue *pwq,
 			      struct wq_barrier *barr,
@@ -2655,7 +2607,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 	for_each_pwq(pwq, wq) {
 		struct worker_pool *pool = pwq->pool;
 
-		spin_lock_irq(&pool->lock);
+		raw_spin_lock_irq(&pool->lock);
 
 		if (flush_color >= 0) {
 			WARN_ON_ONCE(pwq->flush_color != -1);
@@ -2672,7 +2624,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 			pwq->work_color = work_color;
 		}
 
-		spin_unlock_irq(&pool->lock);
+		raw_spin_unlock_irq(&pool->lock);
 	}
 
 	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
@@ -2872,9 +2824,9 @@ void drain_workqueue(struct workqueue_struct *wq)
 	for_each_pwq(pwq, wq) {
 		bool drained;
 
-		spin_lock_irq(&pwq->pool->lock);
+		raw_spin_lock_irq(&pwq->pool->lock);
 		drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
-		spin_unlock_irq(&pwq->pool->lock);
+		raw_spin_unlock_irq(&pwq->pool->lock);
 
 		if (drained)
 			continue;
@@ -2910,7 +2862,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
 		return false;
 	}
 
-	spin_lock_irq(&pool->lock);
+	raw_spin_lock_irq(&pool->lock);
 	/* see the comment in try_to_grab_pending() with the same code */
 	pwq = get_work_pwq(work);
 	if (pwq) {
@@ -2926,7 +2878,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
 	check_flush_dependency(pwq->wq, work);
 
 	insert_wq_barrier(pwq, barr, work, worker);
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 
 	/*
 	 * Force a lock recursion deadlock when using flush_work() inside a
@@ -2945,7 +2897,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
 	rcu_read_unlock();
 	return true;
 already_gone:
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 	rcu_read_unlock();
 	return false;
 }
@@ -3046,7 +2998,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 
 	/* tell other tasks trying to grab @work to back off */
 	mark_work_canceling(work);
-	local_unlock_irqrestore(pendingb_lock, flags);
+	local_irq_restore(flags);
 
 	/*
 	 * This allows canceling during early boot.  We know that @work
@@ -3107,10 +3059,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
  */
 bool flush_delayed_work(struct delayed_work *dwork)
 {
-	local_lock_irq(pendingb_lock);
+	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
 		__queue_work(dwork->cpu, dwork->wq, &dwork->work);
-	local_unlock_irq(pendingb_lock);
+	local_irq_enable();
 	return flush_work(&dwork->work);
 }
 EXPORT_SYMBOL(flush_delayed_work);
@@ -3148,7 +3100,7 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork)
 		return false;
 
 	set_work_pool_and_clear_pending(work, get_work_pool_id(work));
-	local_unlock_irqrestore(pendingb_lock, flags);
+	local_irq_restore(flags);
 	return ret;
 }
 
@@ -3258,7 +3210,7 @@ EXPORT_SYMBOL_GPL(execute_in_process_context);
  *
  * Undo alloc_workqueue_attrs().
  */
-void free_workqueue_attrs(struct workqueue_attrs *attrs)
+static void free_workqueue_attrs(struct workqueue_attrs *attrs)
 {
 	if (attrs) {
 		free_cpumask_var(attrs->cpumask);
@@ -3268,21 +3220,20 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs)
 
 /**
  * alloc_workqueue_attrs - allocate a workqueue_attrs
- * @gfp_mask: allocation mask to use
  *
  * Allocate a new workqueue_attrs, initialize with default settings and
  * return it.
  *
  * Return: The allocated new workqueue_attr on success. %NULL on failure.
  */
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
+static struct workqueue_attrs *alloc_workqueue_attrs(void)
 {
 	struct workqueue_attrs *attrs;
 
-	attrs = kzalloc(sizeof(*attrs), gfp_mask);
+	attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
 	if (!attrs)
 		goto fail;
-	if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
+	if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
 		goto fail;
 
 	cpumask_copy(attrs->cpumask, cpu_possible_mask);
@@ -3339,7 +3290,7 @@ static bool wqattrs_equal(const struct workqueue_attrs *a,
  */
 static int init_worker_pool(struct worker_pool *pool)
 {
-	spin_lock_init(&pool->lock);
+	raw_spin_lock_init(&pool->lock);
 	pool->id = -1;
 	pool->cpu = -1;
 	pool->node = NUMA_NO_NODE;
@@ -3360,7 +3311,7 @@ static int init_worker_pool(struct worker_pool *pool)
 	pool->refcnt = 1;
 
 	/* shouldn't fail above this point */
-	pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	pool->attrs = alloc_workqueue_attrs();
 	if (!pool->attrs)
 		return -ENOMEM;
 	return 0;
@@ -3425,15 +3376,15 @@ static void put_unbound_pool(struct worker_pool *pool)
 	 * @pool's workers from blocking on attach_mutex.  We're the last
 	 * manager and @pool gets freed with the flag set.
 	 */
-	spin_lock_irq(&pool->lock);
-	wait_event_lock_irq(wq_manager_wait,
+	raw_spin_lock_irq(&pool->lock);
+	swait_event_lock_irq(wq_manager_wait,
 			    !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
 	pool->flags |= POOL_MANAGER_ACTIVE;
 
 	while ((worker = first_idle_worker(pool)))
 		destroy_worker(worker);
 	WARN_ON(pool->nr_workers || pool->nr_idle);
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 
 	mutex_lock(&wq_pool_attach_mutex);
 	if (!list_empty(&pool->workers))
@@ -3587,7 +3538,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 		return;
 
 	/* this function can be called during early boot w/ irq disabled */
-	spin_lock_irqsave(&pwq->pool->lock, flags);
+	raw_spin_lock_irqsave(&pwq->pool->lock, flags);
 
 	/*
 	 * During [un]freezing, the caller is responsible for ensuring that
@@ -3610,7 +3561,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 		pwq->max_active = 0;
 	}
 
-	spin_unlock_irqrestore(&pwq->pool->lock, flags);
+	raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
 }
 
 /* initialize newly alloced @pwq which is associated with @wq and @pool */
@@ -3783,8 +3734,8 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
 
 	ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
 
-	new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
-	tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	new_attrs = alloc_workqueue_attrs();
+	tmp_attrs = alloc_workqueue_attrs();
 	if (!ctx || !new_attrs || !tmp_attrs)
 		goto out_free;
 
@@ -3920,7 +3871,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
  *
  * Return: 0 on success and -errno on failure.
  */
-int apply_workqueue_attrs(struct workqueue_struct *wq,
+static int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs)
 {
 	int ret;
@@ -3931,7 +3882,6 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
 
 /**
  * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
@@ -4009,9 +3959,9 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
 
 use_dfl_pwq:
 	mutex_lock(&wq->mutex);
-	spin_lock_irq(&wq->dfl_pwq->pool->lock);
+	raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
 	get_pwq(wq->dfl_pwq);
-	spin_unlock_irq(&wq->dfl_pwq->pool->lock);
+	raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
 	old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
 out_unlock:
 	mutex_unlock(&wq->mutex);
@@ -4130,7 +4080,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		return NULL;
 
 	if (flags & WQ_UNBOUND) {
-		wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+		wq->unbound_attrs = alloc_workqueue_attrs();
 		if (!wq->unbound_attrs)
 			goto err_free_wq;
 	}
@@ -4399,10 +4349,10 @@ unsigned int work_busy(struct work_struct *work)
 	rcu_read_lock();
 	pool = get_work_pool(work);
 	if (pool) {
-		spin_lock_irqsave(&pool->lock, flags);
+		raw_spin_lock_irqsave(&pool->lock, flags);
 		if (find_worker_executing_work(pool, work))
 			ret |= WORK_BUSY_RUNNING;
-		spin_unlock_irqrestore(&pool->lock, flags);
+		raw_spin_unlock_irqrestore(&pool->lock, flags);
 	}
 	rcu_read_unlock();
 
@@ -4608,10 +4558,10 @@ void show_workqueue_state(void)
 		pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
 
 		for_each_pwq(pwq, wq) {
-			spin_lock_irqsave(&pwq->pool->lock, flags);
+			raw_spin_lock_irqsave(&pwq->pool->lock, flags);
 			if (pwq->nr_active || !list_empty(&pwq->delayed_works))
 				show_pwq(pwq);
-			spin_unlock_irqrestore(&pwq->pool->lock, flags);
+			raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
 			/*
 			 * We could be printing a lot from atomic context, e.g.
 			 * sysrq-t -> show_workqueue_state(). Avoid triggering
@@ -4625,7 +4575,7 @@ void show_workqueue_state(void)
 		struct worker *worker;
 		bool first = true;
 
-		spin_lock_irqsave(&pool->lock, flags);
+		raw_spin_lock_irqsave(&pool->lock, flags);
 		if (pool->nr_workers == pool->nr_idle)
 			goto next_pool;
 
@@ -4644,7 +4594,7 @@ void show_workqueue_state(void)
 		}
 		pr_cont("\n");
 	next_pool:
-		spin_unlock_irqrestore(&pool->lock, flags);
+		raw_spin_unlock_irqrestore(&pool->lock, flags);
 		/*
 		 * We could be printing a lot from atomic context, e.g.
 		 * sysrq-t -> show_workqueue_state(). Avoid triggering
@@ -4674,7 +4624,7 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
 		struct worker_pool *pool = worker->pool;
 
 		if (pool) {
-			spin_lock_irq(&pool->lock);
+			raw_spin_lock_irq(&pool->lock);
 			/*
 			 * ->desc tracks information (wq name or
 			 * set_worker_desc()) for the latest execution.  If
@@ -4688,7 +4638,7 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
 					scnprintf(buf + off, size - off, "-%s",
 						  worker->desc);
 			}
-			spin_unlock_irq(&pool->lock);
+			raw_spin_unlock_irq(&pool->lock);
 		}
 	}
 
@@ -4719,7 +4669,7 @@ static void unbind_workers(int cpu)
 
 	for_each_cpu_worker_pool(pool, cpu) {
 		mutex_lock(&wq_pool_attach_mutex);
-		spin_lock_irq(&pool->lock);
+		raw_spin_lock_irq(&pool->lock);
 
 		/*
 		 * We've blocked all attach/detach operations. Make all workers
@@ -4733,7 +4683,7 @@ static void unbind_workers(int cpu)
 
 		pool->flags |= POOL_DISASSOCIATED;
 
-		spin_unlock_irq(&pool->lock);
+		raw_spin_unlock_irq(&pool->lock);
 		mutex_unlock(&wq_pool_attach_mutex);
 
 		/*
@@ -4759,9 +4709,9 @@ static void unbind_workers(int cpu)
 		 * worker blocking could lead to lengthy stalls.  Kick off
 		 * unbound chain execution of currently pending work items.
 		 */
-		spin_lock_irq(&pool->lock);
+		raw_spin_lock_irq(&pool->lock);
 		wake_up_worker(pool);
-		spin_unlock_irq(&pool->lock);
+		raw_spin_unlock_irq(&pool->lock);
 	}
 }
 
@@ -4788,7 +4738,7 @@ static void rebind_workers(struct worker_pool *pool)
 		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
 						  pool->attrs->cpumask) < 0);
 
-	spin_lock_irq(&pool->lock);
+	raw_spin_lock_irq(&pool->lock);
 
 	pool->flags &= ~POOL_DISASSOCIATED;
 
@@ -4827,7 +4777,7 @@ static void rebind_workers(struct worker_pool *pool)
 		WRITE_ONCE(worker->flags, worker_flags);
 	}
 
-	spin_unlock_irq(&pool->lock);
+	raw_spin_unlock_irq(&pool->lock);
 }
 
 /**
@@ -5279,7 +5229,7 @@ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
 
 	lockdep_assert_held(&wq_pool_mutex);
 
-	attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	attrs = alloc_workqueue_attrs();
 	if (!attrs)
 		return NULL;
 
@@ -5701,7 +5651,7 @@ static void __init wq_numa_init(void)
 		return;
 	}
 
-	wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
+	wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
 	BUG_ON(!wq_update_unbound_numa_attrs_buf);
 
 	/*
@@ -5776,7 +5726,7 @@ int __init workqueue_init_early(void)
 	for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
 		struct workqueue_attrs *attrs;
 
-		BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+		BUG_ON(!(attrs = alloc_workqueue_attrs()));
 		attrs->nice = std_nice[i];
 		unbound_std_wq_attrs[i] = attrs;
 
@@ -5785,7 +5735,7 @@ int __init workqueue_init_early(void)
 		 * guaranteed by max_active which is enforced by pwqs.
 		 * Turn off NUMA so that dfl_pwq is used for all nodes.
 		 */
-		BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+		BUG_ON(!(attrs = alloc_workqueue_attrs()));
 		attrs->nice = std_nice[i];
 		attrs->no_numa = true;
 		ordered_wq_attrs[i] = attrs;
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-09-20 21:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-20 21:53 [RFC][PATCH RT 0/7] Revert of simple work, and backport workqueue rework Steven Rostedt
2019-09-20 21:53 ` [RFC][PATCH RT 1/7] revert-aio Steven Rostedt
2019-09-20 21:53 ` [RFC][PATCH RT 2/7] fs/aio: simple simple work Steven Rostedt
2019-09-20 21:53 ` [RFC][PATCH RT 3/7] revert-thermal Steven Rostedt
2019-09-20 21:53 ` [RFC][PATCH RT 4/7] thermal: Defer thermal wakups to threads Steven Rostedt
2019-09-20 21:53 ` [RFC][PATCH RT 5/7] revert-block Steven Rostedt
2019-09-20 21:53 ` [RFC][PATCH RT 6/7] block: blk-mq: move blk_queue_usage_counter_release() into process context Steven Rostedt
2019-09-20 21:53 ` [RFC][PATCH RT 7/7] workqueue: rework Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).