linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [ANNOUNCE] v5.4.17-rt9
@ 2020-02-04 16:58 Sebastian Andrzej Siewior
  2020-02-06 23:59 ` Fernando Lopez-Lezcano
  0 siblings, 1 reply; 6+ messages in thread
From: Sebastian Andrzej Siewior @ 2020-02-04 16:58 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, linux-rt-users, Steven Rostedt

Dear RT folks!

I'm pleased to announce the v5.4.17-rt9 patch set. 

Changes since v5.4.17-rt8:

  - A rework of percpu-rwsem locking. The fs core was using a
    percpu-rwsem and returned with acquired lock to userland during
    `fsfreeze' which led warnings. On !RT the warnings were disabled but
    the same lockdep trick did not work on RT.
    Reported by Juri Lelli, patch(es) by Peter Zijlstra.

  - Include a header file the `current' macro to not break an allmod
    build on ARM.

  - A tweak to migrate_enable() to not having to wait until
    stop_one_cpu_nowait() finishes in case CPU-mask changed during
    migrate_disable() and the CPU has to be changed. Patch by Scott
    Wood.

  - Drop a lock earlier in mm/memcontrol. Not a bug but there is no need
    for the additional locked section. Patch by Matt Fleming.

Known issues
     - None

The delta patch against v5.4.17-rt8 is appended below and can be found here:
 
     https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.4/incr/patch-5.4.17-rt8-rt9.patch.xz

You can get this release via the git tree at:

    git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.4.17-rt9

The RT patch against v5.4.17 can be found here:

    https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.4/older/patch-5.4.17-rt9.patch.xz

The split quilt queue is available at:

    https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.4/older/patches-5.4.17-rt9.tar.xz

Sebastian

diff --git a/include/linux/locallock.h b/include/linux/locallock.h
index 05a15110c8aa7..9b6b4def52d49 100644
--- a/include/linux/locallock.h
+++ b/include/linux/locallock.h
@@ -3,6 +3,7 @@
 
 #include <linux/percpu.h>
 #include <linux/spinlock.h>
+#include <asm/current.h>
 
 #ifdef CONFIG_PREEMPT_RT
 
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 9fa9a1dd9a11a..6e8d3871450a4 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -3,41 +3,52 @@
 #define _LINUX_PERCPU_RWSEM_H
 
 #include <linux/atomic.h>
-#include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/rcuwait.h>
+#include <linux/wait.h>
 #include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
 	unsigned int __percpu	*read_count;
-	struct rw_semaphore	rw_sem; /* slowpath */
-	struct rcuwait          writer; /* blocked writer */
-	int			readers_block;
+	struct rcuwait		writer;
+	wait_queue_head_t	waiters;
+	atomic_t		block;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
 };
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname },
+#else
+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)
+#endif
+
 #define __DEFINE_PERCPU_RWSEM(name, is_static)				\
 static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name);		\
 is_static struct percpu_rw_semaphore name = {				\
 	.rss = __RCU_SYNC_INITIALIZER(name.rss),			\
 	.read_count = &__percpu_rwsem_rc_##name,			\
-	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
 	.writer = __RCUWAIT_INITIALIZER(name.writer),			\
+	.waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters),		\
+	.block = ATOMIC_INIT(0),					\
+	__PERCPU_RWSEM_DEP_MAP_INIT(name)				\
 }
+
 #define DEFINE_PERCPU_RWSEM(name)		\
 	__DEFINE_PERCPU_RWSEM(name, /* not static */)
 #define DEFINE_STATIC_PERCPU_RWSEM(name)	\
 	__DEFINE_PERCPU_RWSEM(name, static)
 
-extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
-extern void __percpu_up_read(struct percpu_rw_semaphore *);
+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
 
 static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
 {
 	might_sleep();
 
-	rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
 
 	preempt_disable();
 	/*
@@ -48,8 +59,9 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
 	 * and that once the synchronize_rcu() is done, the writer will see
 	 * anything we did within this RCU-sched read-size critical section.
 	 */
-	__this_cpu_inc(*sem->read_count);
-	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+	if (likely(rcu_sync_is_idle(&sem->rss)))
+		__this_cpu_inc(*sem->read_count);
+	else
 		__percpu_down_read(sem, false); /* Unconditional memory barrier */
 	/*
 	 * The preempt_enable() prevents the compiler from
@@ -58,16 +70,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
 	preempt_enable();
 }
 
-static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
-	int ret = 1;
+	bool ret = true;
 
 	preempt_disable();
 	/*
 	 * Same as in percpu_down_read().
 	 */
-	__this_cpu_inc(*sem->read_count);
-	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+	if (likely(rcu_sync_is_idle(&sem->rss)))
+		__this_cpu_inc(*sem->read_count);
+	else
 		ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
 	preempt_enable();
 	/*
@@ -76,24 +89,36 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 	 */
 
 	if (ret)
-		rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
+		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
 
 	return ret;
 }
 
 static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 {
+	rwsem_release(&sem->dep_map, 1, _RET_IP_);
+
 	preempt_disable();
 	/*
 	 * Same as in percpu_down_read().
 	 */
-	if (likely(rcu_sync_is_idle(&sem->rss)))
+	if (likely(rcu_sync_is_idle(&sem->rss))) {
 		__this_cpu_dec(*sem->read_count);
-	else
-		__percpu_up_read(sem); /* Unconditional memory barrier */
+	} else {
+		/*
+		 * slowpath; reader will only ever wake a single blocked
+		 * writer.
+		 */
+		smp_mb(); /* B matches C */
+		/*
+		 * In other words, if they see our decrement (presumably to
+		 * aggregate zero, as that is the only time it matters) they
+		 * will also see our critical section.
+		 */
+		__this_cpu_dec(*sem->read_count);
+		rcuwait_wake_up(&sem->writer);
+	}
 	preempt_enable();
-
-	rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
 }
 
 extern void percpu_down_write(struct percpu_rw_semaphore *);
@@ -110,29 +135,19 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 	__percpu_init_rwsem(sem, #sem, &rwsem_key);		\
 })
 
-#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
-
-#define percpu_rwsem_assert_held(sem)				\
-	lockdep_assert_held(&(sem)->rw_sem)
+#define percpu_rwsem_is_held(sem) lockdep_is_held(sem)
+#define percpu_rwsem_assert_held(sem)  lockdep_assert_held(sem)
 
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
 					bool read, unsigned long ip)
 {
-	lock_release(&sem->rw_sem.dep_map, 1, ip);
-#if defined(CONFIG_RWSEM_SPIN_ON_OWNER) && !defined(CONFIG_PREEMPT_RT)
-	if (!read)
-		atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
-#endif
+	lock_release(&sem->dep_map, 1, ip);
 }
 
 static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
 					bool read, unsigned long ip)
 {
-	lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
-#if defined(CONFIG_RWSEM_SPIN_ON_OWNER) && !defined(CONFIG_PREEMPT_RT)
-	if (!read)
-		atomic_long_set(&sem->rw_sem.owner, (long)current);
-#endif
+	lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
 }
 
 #endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index b240dd987834f..393f4520befc9 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -58,12 +58,6 @@ struct rw_semaphore {
 #endif
 };
 
-/*
- * Setting all bits of the owner field except bit 0 will indicate
- * that the rwsem is writer-owned with an unknown owner.
- */
-#define RWSEM_OWNER_UNKNOWN	(-2L)
-
 /* In all implementations count != 0 means locked */
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2090f205b5c68..1781c47d81f0b 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -21,6 +21,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int
 #define WQ_FLAG_EXCLUSIVE	0x01
 #define WQ_FLAG_WOKEN		0x02
 #define WQ_FLAG_BOOKMARK	0x04
+#define WQ_FLAG_CUSTOM		0x08
 
 /*
  * A single wait-queue entry structure:
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a24fd4763263e..368309e9a1f52 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void)
 
 static void lockdep_acquire_cpus_lock(void)
 {
-	rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
+	rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
 }
 
 static void lockdep_release_cpus_lock(void)
 {
-	rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
+	rwsem_release(&cpu_hotplug_lock.dep_map, 1, _THIS_IP_);
 }
 
 /*
diff --git a/kernel/exit.c b/kernel/exit.c
index d659c84716b46..dcb1f1182101e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -258,6 +258,7 @@ void rcuwait_wake_up(struct rcuwait *w)
 		wake_up_process(task);
 	rcu_read_unlock();
 }
+EXPORT_SYMBOL_GPL(rcuwait_wake_up);
 
 /*
  * Determine if a process group is "orphaned", according to the POSIX
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 364d38a0c4441..e4a4a56037bb2 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,27 +1,29 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/atomic.h>
-#include <linux/rwsem.h>
 #include <linux/percpu.h>
+#include <linux/wait.h>
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/errno.h>
 
-#include "rwsem.h"
-
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
-			const char *name, struct lock_class_key *rwsem_key)
+			const char *name, struct lock_class_key *key)
 {
 	sem->read_count = alloc_percpu(int);
 	if (unlikely(!sem->read_count))
 		return -ENOMEM;
 
-	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
 	rcu_sync_init(&sem->rss);
-	__init_rwsem(&sem->rw_sem, name, rwsem_key);
 	rcuwait_init(&sem->writer);
-	sem->readers_block = 0;
+	init_waitqueue_head(&sem->waiters);
+	atomic_set(&sem->block, 0);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+	lockdep_init_map(&sem->dep_map, name, key, 0);
+#endif
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
@@ -41,73 +43,139 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
 }
 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
-int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
+static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
+	__this_cpu_inc(*sem->read_count);
+
 	/*
 	 * Due to having preemption disabled the decrement happens on
 	 * the same CPU as the increment, avoiding the
 	 * increment-on-one-CPU-and-decrement-on-another problem.
 	 *
-	 * If the reader misses the writer's assignment of readers_block, then
-	 * the writer is guaranteed to see the reader's increment.
+	 * If the reader misses the writer's assignment of sem->block, then the
+	 * writer is guaranteed to see the reader's increment.
 	 *
 	 * Conversely, any readers that increment their sem->read_count after
-	 * the writer looks are guaranteed to see the readers_block value,
-	 * which in turn means that they are guaranteed to immediately
-	 * decrement their sem->read_count, so that it doesn't matter that the
-	 * writer missed them.
+	 * the writer looks are guaranteed to see the sem->block value, which
+	 * in turn means that they are guaranteed to immediately decrement
+	 * their sem->read_count, so that it doesn't matter that the writer
+	 * missed them.
 	 */
 
 	smp_mb(); /* A matches D */
 
 	/*
-	 * If !readers_block the critical section starts here, matched by the
+	 * If !sem->block the critical section starts here, matched by the
 	 * release in percpu_up_write().
 	 */
-	if (likely(!smp_load_acquire(&sem->readers_block)))
-		return 1;
+	if (likely(!atomic_read_acquire(&sem->block)))
+		return true;
 
-	/*
-	 * Per the above comment; we still have preemption disabled and
-	 * will thus decrement on the same CPU as we incremented.
-	 */
-	__percpu_up_read(sem);
-
-	if (try)
-		return 0;
-
-	/*
-	 * We either call schedule() in the wait, or we'll fall through
-	 * and reschedule on the preempt_enable() in percpu_down_read().
-	 */
-	preempt_enable_no_resched();
-
-	/*
-	 * Avoid lockdep for the down/up_read() we already have them.
-	 */
-	__down_read(&sem->rw_sem);
-	this_cpu_inc(*sem->read_count);
-	__up_read(&sem->rw_sem);
-
-	preempt_disable();
-	return 1;
-}
-EXPORT_SYMBOL_GPL(__percpu_down_read);
-
-void __percpu_up_read(struct percpu_rw_semaphore *sem)
-{
-	smp_mb(); /* B matches C */
-	/*
-	 * In other words, if they see our decrement (presumably to aggregate
-	 * zero, as that is the only time it matters) they will also see our
-	 * critical section.
-	 */
 	__this_cpu_dec(*sem->read_count);
 
-	/* Prod writer to recheck readers_active */
+	/* Prod writer to re-evaluate readers_active_check() */
 	rcuwait_wake_up(&sem->writer);
+
+	return false;
 }
-EXPORT_SYMBOL_GPL(__percpu_up_read);
+
+static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
+{
+	if (atomic_read(&sem->block))
+		return false;
+
+	return atomic_xchg(&sem->block, 1) == 0;
+}
+
+static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
+{
+	if (reader) {
+		bool ret;
+
+		preempt_disable();
+		ret = __percpu_down_read_trylock(sem);
+		preempt_enable();
+
+		return ret;
+	}
+	return __percpu_down_write_trylock(sem);
+}
+
+/*
+ * The return value of wait_queue_entry::func means:
+ *
+ *  <0 - error, wakeup is terminated and the error is returned
+ *   0 - no wakeup, a next waiter is tried
+ *  >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
+ *
+ * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
+ * and play games with the return value to allow waking multiple readers.
+ *
+ * Specifically, we wake readers until we've woken a single writer, or until a
+ * trylock fails.
+ */
+static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
+				      unsigned int mode, int wake_flags,
+				      void *key)
+{
+	struct task_struct *p = get_task_struct(wq_entry->private);
+	bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
+	struct percpu_rw_semaphore *sem = key;
+
+	/* concurrent against percpu_down_write(), can get stolen */
+	if (!__percpu_rwsem_trylock(sem, reader))
+		return 1;
+
+	list_del_init(&wq_entry->entry);
+	smp_store_release(&wq_entry->private, NULL);
+
+	wake_up_process(p);
+	put_task_struct(p);
+
+	return !reader; /* wake (readers until) 1 writer */
+}
+
+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
+{
+	DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
+	bool wait;
+
+	spin_lock_irq(&sem->waiters.lock);
+	/*
+	 * Serialize against the wakeup in percpu_up_write(), if we fail
+	 * the trylock, the wakeup must see us on the list.
+	 */
+	wait = !__percpu_rwsem_trylock(sem, reader);
+	if (wait) {
+		wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
+		__add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
+	}
+	spin_unlock_irq(&sem->waiters.lock);
+
+	while (wait) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!smp_load_acquire(&wq_entry.private))
+			break;
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
+bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+{
+	if (__percpu_down_read_trylock(sem))
+		return true;
+
+	if (try)
+		return false;
+
+	preempt_enable();
+	percpu_rwsem_wait(sem, /* .reader = */ true);
+	preempt_disable();
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(__percpu_down_read);
 
 #define per_cpu_sum(var)						\
 ({									\
@@ -124,6 +192,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read);
  * zero.  If this sum is zero, then it is stable due to the fact that if any
  * newly arriving readers increment a given counter, they will immediately
  * decrement that same counter.
+ *
+ * Assumes sem->block is set.
  */
 static bool readers_active_check(struct percpu_rw_semaphore *sem)
 {
@@ -142,32 +212,36 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
 
 void percpu_down_write(struct percpu_rw_semaphore *sem)
 {
+	might_sleep();
+	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
 	/* Notify readers to take the slow path. */
 	rcu_sync_enter(&sem->rss);
 
-	down_write(&sem->rw_sem);
+	/*
+	 * Try set sem->block; this provides writer-writer exclusion.
+	 * Having sem->block set makes new readers block.
+	 */
+	if (!__percpu_down_write_trylock(sem))
+		percpu_rwsem_wait(sem, /* .reader = */ false);
+
+	/* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
 
 	/*
-	 * Notify new readers to block; up until now, and thus throughout the
-	 * longish rcu_sync_enter() above, new readers could still come in.
-	 */
-	WRITE_ONCE(sem->readers_block, 1);
-
-	smp_mb(); /* D matches A */
-
-	/*
-	 * If they don't see our writer of readers_block, then we are
-	 * guaranteed to see their sem->read_count increment, and therefore
-	 * will wait for them.
+	 * If they don't see our store of sem->block, then we are guaranteed to
+	 * see their sem->read_count increment, and therefore will wait for
+	 * them.
 	 */
 
-	/* Wait for all now active readers to complete. */
+	/* Wait for all active readers to complete. */
 	rcuwait_wait_event(&sem->writer, readers_active_check(sem));
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
 void percpu_up_write(struct percpu_rw_semaphore *sem)
 {
+	rwsem_release(&sem->dep_map, 1, _RET_IP_);
+
 	/*
 	 * Signal the writer is done, no fast path yet.
 	 *
@@ -178,12 +252,12 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
 	 * Therefore we force it through the slow path which guarantees an
 	 * acquire and thereby guarantees the critical section's consistency.
 	 */
-	smp_store_release(&sem->readers_block, 0);
+	atomic_set_release(&sem->block, 0);
 
 	/*
-	 * Release the write lock, this will allow readers back in the game.
+	 * Prod any pending reader/writer to make progress.
 	 */
-	up_write(&sem->rw_sem);
+	__wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
 
 	/*
 	 * Once this completes (at least one RCU-sched grace period hence) the
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 7d566aa1ab429..0d11ba11a32ac 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -28,8 +28,6 @@
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
 
-#include "rwsem.h"
-
 #ifndef CONFIG_PREEMPT_RT
 #include "lock_events.h"
 
@@ -662,8 +660,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
 	unsigned long flags;
 	bool ret = true;
 
-	BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
-
 	if (need_resched()) {
 		lockevent_inc(rwsem_opt_fail);
 		return false;
@@ -1341,7 +1337,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 /*
  * lock for reading
  */
-inline void __down_read(struct rw_semaphore *sem)
+static inline void __down_read(struct rw_semaphore *sem)
 {
 	if (!rwsem_read_trylock(sem)) {
 		rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
@@ -1429,7 +1425,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 /*
  * unlock after reading
  */
-inline void __up_read(struct rw_semaphore *sem)
+static inline void __up_read(struct rw_semaphore *sem)
 {
 	long tmp;
 
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index cd7820880a4f4..e69de29bb2d1d 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __INTERNAL_RWSEM_H
-#define __INTERNAL_RWSEM_H
-#include <linux/rwsem.h>
-
-#ifndef CONFIG_PREEMPT_RT
-extern void __down_read(struct rw_semaphore *sem);
-extern void __up_read(struct rw_semaphore *sem);
-#endif
-
-#endif /* __INTERNAL_RWSEM_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 754f6afb438d8..ea05364619819 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8189,6 +8189,9 @@ static void migrate_disabled_sched(struct task_struct *p)
 	p->migrate_disable_scheduled = 1;
 }
 
+static DEFINE_PER_CPU(struct cpu_stop_work, migrate_work);
+static DEFINE_PER_CPU(struct migration_arg, migrate_arg);
+
 void migrate_enable(void)
 {
 	struct task_struct *p = current;
@@ -8227,22 +8230,24 @@ void migrate_enable(void)
 
 	WARN_ON(smp_processor_id() != cpu);
 	if (!is_cpu_allowed(p, cpu)) {
-		struct migration_arg arg = { .task = p };
-		struct cpu_stop_work work;
+		struct migration_arg __percpu *arg;
+		struct cpu_stop_work __percpu *work;
 		struct rq_flags rf;
 
+		work = this_cpu_ptr(&migrate_work);
+		arg = this_cpu_ptr(&migrate_arg);
+		WARN_ON_ONCE(!arg->done && !work->disabled && work->arg);
+
+		arg->task = p;
+		arg->done = false;
+
 		rq = task_rq_lock(p, &rf);
 		update_rq_clock(rq);
-		arg.dest_cpu = select_fallback_rq(cpu, p);
+		arg->dest_cpu = select_fallback_rq(cpu, p);
 		task_rq_unlock(rq, p, &rf);
 
 		stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
-				    &arg, &work);
-		__schedule(true);
-		if (!work.disabled) {
-			while (!arg.done)
-				cpu_relax();
-		}
+				    arg, work);
 	}
 
 out:
diff --git a/localversion-rt b/localversion-rt
index 700c857efd9ba..22746d6390a42 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt8
+-rt9
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 953e106d6cb2d..1e3d9047474ba 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7043,10 +7043,10 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page),
 				     -nr_entries);
 	memcg_check_events(memcg, page);
+	local_unlock_irqrestore(event_lock, flags);
 
 	if (!mem_cgroup_is_root(memcg))
 		css_put_many(&memcg->css, nr_entries);
-	local_unlock_irqrestore(event_lock, flags);
 }
 
 /**

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [ANNOUNCE] v5.4.17-rt9
  2020-02-04 16:58 [ANNOUNCE] v5.4.17-rt9 Sebastian Andrzej Siewior
@ 2020-02-06 23:59 ` Fernando Lopez-Lezcano
  2020-02-07  6:11   ` Mike Galbraith
  0 siblings, 1 reply; 6+ messages in thread
From: Fernando Lopez-Lezcano @ 2020-02-06 23:59 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior, Thomas Gleixner
  Cc: nando, LKML, linux-rt-users, Steven Rostedt

On 2/4/20 8:58 AM, Sebastian Andrzej Siewior wrote:
> Dear RT folks!
> 
> I'm pleased to announce the v5.4.17-rt9 patch set.

Thanks!
I see a continuous stream of these:

----
[165992.032318] BUG: sleeping function called from invalid context at 
kernel/locking/rtmutex.c:973
[165992.032320] in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 
12209, name: kworker/u8:3
[165992.032322] INFO: lockdep is turned off.
[165992.032322] irq event stamp: 0
[165992.032323] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
[165992.032325] hardirqs last disabled at (0): [<ffffffff980ed372>] 
copy_process+0x802/0x2000
[165992.032329] softirqs last  enabled at (0): [<ffffffff980ed372>] 
copy_process+0x802/0x2000
[165992.032331] softirqs last disabled at (0): [<0000000000000000>] 0x0
[165992.032332] CPU: 2 PID: 12209 Comm: kworker/u8:3 Tainted: G        W 
         5.4.17-100.rt9.1.fc30.ccrma.x86_64+rt #1
[165992.032333] Hardware name:  /NUC6i7KYB, BIOS 
KYSKLi70.86A.0034.2016.0503.1003 05/03/2016
[165992.032369] Workqueue: i915 retire_work_handler [i915]
[165992.032369] Call Trace:
[165992.032373]  dump_stack+0x8f/0xd0
[165992.032377]  ___might_sleep.cold+0xb3/0xc3
[165992.032380]  rt_spin_lock_nested+0x8e/0xd0
[165992.032382]  ? __i915_sw_fence_complete+0x15c/0x200 [i915]
[165992.032407]  __i915_sw_fence_complete+0x15c/0x200 [i915]
[165992.032432]  __i915_request_queue+0x19/0x50 [i915]
[165992.032463]  __engine_park+0xe2/0x1d0 [i915]
[165992.032489]  ____intel_wakeref_put_last+0x1e/0x50 [i915]
[165992.032511]  i915_request_retire+0x293/0x480 [i915]
[165992.032541]  retire_requests+0x54/0x60 [i915]
[165992.032569]  i915_retire_requests+0xea/0x220 [i915]
[165992.032598]  retire_work_handler+0x56/0x60 [i915]
[165992.032626]  process_one_work+0x261/0x6c0
[165992.032631]  worker_thread+0x50/0x3b0
[165992.032634]  kthread+0x106/0x140
[165992.032636]  ? process_one_work+0x6c0/0x6c0
[165992.032638]  ? kthread_park+0x90/0x90
[165992.032640]  ret_from_fork+0x3a/0x50
----

Something similar used to happen a while back, but I had not seen it on 
5.2.x rt patched kernels. It is back (or something very similar).

This is on Fedora 30 upgraded to the latest, and the kernel is based on 
(as I normally do) the source package for the equivalent Fedora kernel, 
with the RT patch added (and configured).

Thanks again for your work!
-- Fernando


> Changes since v5.4.17-rt8:
> 
>    - A rework of percpu-rwsem locking. The fs core was using a
>      percpu-rwsem and returned with acquired lock to userland during
>      `fsfreeze' which led warnings. On !RT the warnings were disabled but
>      the same lockdep trick did not work on RT.
>      Reported by Juri Lelli, patch(es) by Peter Zijlstra.
> 
>    - Include a header file the `current' macro to not break an allmod
>      build on ARM.
> 
>    - A tweak to migrate_enable() to not having to wait until
>      stop_one_cpu_nowait() finishes in case CPU-mask changed during
>      migrate_disable() and the CPU has to be changed. Patch by Scott
>      Wood.
> 
>    - Drop a lock earlier in mm/memcontrol. Not a bug but there is no need
>      for the additional locked section. Patch by Matt Fleming.
> 
> Known issues
>       - None


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [ANNOUNCE] v5.4.17-rt9
  2020-02-06 23:59 ` Fernando Lopez-Lezcano
@ 2020-02-07  6:11   ` Mike Galbraith
  2020-02-07  6:13     ` Fernando Lopez-Lezcano
  2020-02-12 10:20     ` Sebastian Andrzej Siewior
  0 siblings, 2 replies; 6+ messages in thread
From: Mike Galbraith @ 2020-02-07  6:11 UTC (permalink / raw)
  To: Fernando Lopez-Lezcano, Sebastian Andrzej Siewior, Thomas Gleixner
  Cc: LKML, linux-rt-users, Steven Rostedt

On Thu, 2020-02-06 at 15:59 -0800, Fernando Lopez-Lezcano wrote:
> On 2/4/20 8:58 AM, Sebastian Andrzej Siewior wrote:
> > Dear RT folks!
> >
> > I'm pleased to announce the v5.4.17-rt9 patch set.
>
> Thanks!
> I see a continuous stream of these:

(snips gripage)

Yup, d67739268cf0 annoys RT locking if lockdep is enabled.  The below
shut it up for my i915 equipped lappy.

drm/i915/gt: use a LOCAL_IRQ_LOCK in __timeline_mark_lock()

Quoting drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe

    We use a fake timeline->mutex lock to reassure lockdep that the timeline
    is always locked when emitting requests. However, the use inside
    __engine_park() may be inside hardirq and so lockdep now complains about
    the mixed irq-state of the nested locked. Disable irqs around the
    lockdep tracking to keep it happy.

This lockdep appeasement breaks RT because we take sleeping locks between
__timeline_mark_lock()/unlock().  Use a LOCAL_IRQ_LOCK instead.

Signed-off-by: Mike Galbraith <efaukt@gmx.de>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c |    7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -38,12 +38,15 @@ static int __engine_unpark(struct intel_
 }

 #if IS_ENABLED(CONFIG_LOCKDEP)
+#include <linux/locallock.h>
+
+static DEFINE_LOCAL_IRQ_LOCK(timeline_lock);

 static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
 {
 	unsigned long flags;

-	local_irq_save(flags);
+	local_lock_irqsave(timeline_lock, flags);
 	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);

 	return flags;
@@ -53,7 +56,7 @@ static inline void __timeline_mark_unloc
 					  unsigned long flags)
 {
 	mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
-	local_irq_restore(flags);
+	local_unlock_irqrestore(timeline_lock, flags);
 }

 #else


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [ANNOUNCE] v5.4.17-rt9
  2020-02-07  6:11   ` Mike Galbraith
@ 2020-02-07  6:13     ` Fernando Lopez-Lezcano
  2020-02-07  6:56       ` Mike Galbraith
  2020-02-12 10:20     ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 6+ messages in thread
From: Fernando Lopez-Lezcano @ 2020-02-07  6:13 UTC (permalink / raw)
  To: Mike Galbraith, Sebastian Andrzej Siewior, Thomas Gleixner
  Cc: nando, LKML, linux-rt-users, Steven Rostedt

On 2/6/20 10:11 PM, Mike Galbraith wrote:
> On Thu, 2020-02-06 at 15:59 -0800, Fernando Lopez-Lezcano wrote:
>> On 2/4/20 8:58 AM, Sebastian Andrzej Siewior wrote:
>>> Dear RT folks!
>>>
>>> I'm pleased to announce the v5.4.17-rt9 patch set.
>>
>> Thanks!
>> I see a continuous stream of these:
> 
> (snips gripage)
> 
> Yup, d67739268cf0 annoys RT locking if lockdep is enabled.  The below
> shut it up for my i915 equipped lappy.

Wow, Mike, thanks!, will try it out and report...
(might take me a couple of days)

-- Fernando


> drm/i915/gt: use a LOCAL_IRQ_LOCK in __timeline_mark_lock()
> 
> Quoting drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe
> 
>      We use a fake timeline->mutex lock to reassure lockdep that the timeline
>      is always locked when emitting requests. However, the use inside
>      __engine_park() may be inside hardirq and so lockdep now complains about
>      the mixed irq-state of the nested locked. Disable irqs around the
>      lockdep tracking to keep it happy.
> 
> This lockdep appeasement breaks RT because we take sleeping locks between
> __timeline_mark_lock()/unlock().  Use a LOCAL_IRQ_LOCK instead.
> 
> Signed-off-by: Mike Galbraith <efaukt@gmx.de>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c |    7 +++++--
>   1 file changed, 5 insertions(+), 2 deletions(-)
> 
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -38,12 +38,15 @@ static int __engine_unpark(struct intel_
>   }
> 
>   #if IS_ENABLED(CONFIG_LOCKDEP)
> +#include <linux/locallock.h>
> +
> +static DEFINE_LOCAL_IRQ_LOCK(timeline_lock);
> 
>   static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
>   {
>   	unsigned long flags;
> 
> -	local_irq_save(flags);
> +	local_lock_irqsave(timeline_lock, flags);
>   	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
> 
>   	return flags;
> @@ -53,7 +56,7 @@ static inline void __timeline_mark_unloc
>   					  unsigned long flags)
>   {
>   	mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
> -	local_irq_restore(flags);
> +	local_unlock_irqrestore(timeline_lock, flags);
>   }
> 
>   #else
> 
> 



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [ANNOUNCE] v5.4.17-rt9
  2020-02-07  6:13     ` Fernando Lopez-Lezcano
@ 2020-02-07  6:56       ` Mike Galbraith
  0 siblings, 0 replies; 6+ messages in thread
From: Mike Galbraith @ 2020-02-07  6:56 UTC (permalink / raw)
  To: Fernando Lopez-Lezcano, Sebastian Andrzej Siewior, Thomas Gleixner
  Cc: LKML, linux-rt-users, Steven Rostedt

On Thu, 2020-02-06 at 22:13 -0800, Fernando Lopez-Lezcano wrote:
> On 2/6/20 10:11 PM, Mike Galbraith wrote:
> > On Thu, 2020-02-06 at 15:59 -0800, Fernando Lopez-Lezcano wrote:
> >> On 2/4/20 8:58 AM, Sebastian Andrzej Siewior wrote:
> >>> Dear RT folks!
> >>>
> >>> I'm pleased to announce the v5.4.17-rt9 patch set.
> >>
> >> Thanks!
> >> I see a continuous stream of these:
> >
> > (snips gripage)
> >
> > Yup, d67739268cf0 annoys RT locking if lockdep is enabled.  The below
> > shut it up for my i915 equipped lappy.
>
> Wow, Mike, thanks!, will try it out and report...
> (might take me a couple of days)

I suspect you could just change the IS_ENABLED line to exclude
CONFIG_PREEMPT_RT from the lockdep hack instead.  Replacing
local_irq_disable() with a local lock is the routine fix, but in this
case it more closely resembles putting a bandaid on a bandaid :)

	-Mike


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [ANNOUNCE] v5.4.17-rt9
  2020-02-07  6:11   ` Mike Galbraith
  2020-02-07  6:13     ` Fernando Lopez-Lezcano
@ 2020-02-12 10:20     ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 6+ messages in thread
From: Sebastian Andrzej Siewior @ 2020-02-12 10:20 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Fernando Lopez-Lezcano, Thomas Gleixner, LKML, linux-rt-users,
	Steven Rostedt

On 2020-02-07 07:11:06 [+0100], Mike Galbraith wrote:
> drm/i915/gt: use a LOCAL_IRQ_LOCK in __timeline_mark_lock()
> 
> Quoting drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe
> 
>     We use a fake timeline->mutex lock to reassure lockdep that the timeline
>     is always locked when emitting requests. However, the use inside
>     __engine_park() may be inside hardirq and so lockdep now complains about
>     the mixed irq-state of the nested locked. Disable irqs around the
>     lockdep tracking to keep it happy.
> 
> This lockdep appeasement breaks RT because we take sleeping locks between
> __timeline_mark_lock()/unlock().  Use a LOCAL_IRQ_LOCK instead.
> 
> Signed-off-by: Mike Galbraith <efaukt@gmx.de>

Applied, thank you.

Sebastian

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-02-12 10:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-04 16:58 [ANNOUNCE] v5.4.17-rt9 Sebastian Andrzej Siewior
2020-02-06 23:59 ` Fernando Lopez-Lezcano
2020-02-07  6:11   ` Mike Galbraith
2020-02-07  6:13     ` Fernando Lopez-Lezcano
2020-02-07  6:56       ` Mike Galbraith
2020-02-12 10:20     ` Sebastian Andrzej Siewior

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).