diff --git a/ipc/sem.c b/ipc/sem.c
index bf534c74293e..6026187f79f8 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -87,6 +87,7 @@
 #include <linux/sched/wake_q.h>
 #include <linux/nospec.h>
 #include <linux/rhashtable.h>
+#include <linux/jiffies.h>
 
 #include <linux/uaccess.h>
 #include "util.h"
@@ -336,20 +337,43 @@ static void complexmode_enter(struct sem_array *sma)
 	int i;
 	struct sem *sem;
 
+	/* caller owns sem_perm.lock -> plain C access */
 	if (sma->use_global_lock > 0)  {
 		/*
 		 * We are already in global lock mode.
 		 * Nothing to do, just reset the
 		 * counter until we return to simple mode.
 		 */
+		/* a change from a non-zero value to another
+		 * non-zero value. Plain C is sufficient, as all
+		 * readers either own sem_perm.lock or are using
+		 * data_race() or smp_load_acquire().
+		 */
 		sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
 		return;
 	}
+	/* Question: This pairs with the smp_load_acquire
+	 * in sem_lock(), in a racy way:
+	 * The reader in sem_lock() may see the new value
+	 * immediately, ...
+	 */
 	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
 
 	for (i = 0; i < sma->sem_nsems; i++) {
 		sem = &sma->sems[i];
 		spin_lock(&sem->lock);
+		/* ..., or much later.
+		 * But this is the latest possible time:
+		 * sem_lock() owns one of the sem->lock locks
+		 * when using smp_load_acquire(). Thus one of the
+		 * spin_unlock()s in this loop is the _release for
+		 * the plain C write above.
+		 * My current understanding: Plain C is correct,
+		 * as the reader is either using data_race() or
+		 * smp_load_acquire(), or it is a trivial case
+		 * of the reader owns sem_perm.lock - and we own
+		 * that lock all the time.
+		 */
 		spin_unlock(&sem->lock);
 	}
 }
@@ -366,11 +390,21 @@ static void complexmode_tryleave(struct sem_array *sma)
 		 */
 		return;
 	}
+	/* sem_perm.lock owned, and all writes to sma->use_global_lock
+	 * happen under that lock -> plain C
+	 */
 	if (sma->use_global_lock == 1) {
 
 		/* See SEM_BARRIER_1 for purpose/pairing */
 		smp_store_release(&sma->use_global_lock, 0);
 	} else {
+		/* the read side is maked -> plain C.
+		 * Question: Old value 4, new value 3.
+		 * If it might happen that the actual
+		 * change is 4 -> 0 -> 3 (i.e. first:
+		 * clear bit 2, then set bits 0&1, then
+		 * this would break the algorithm.
+		 * Is therefore WRITE_ONCE() required? */
 		sma->use_global_lock--;
 	}
 }
@@ -412,7 +446,20 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 	 * Initial check for use_global_lock. Just an optimization,
 	 * no locking, no memory barrier.
 	 */
-	if (!sma->use_global_lock) {
+#if 1
+	/* the code works fine regardless of the returned value
+	 * -> data_race().
+	 */
+	if (!data_race(sma->use_global_lock)) {
+#else
+	/* proof of the claim that the code always works:
+	 * My benchmarks ran fine with this implementation :-)
+	 */
+	if (jiffies%2) {
+		pr_info("jiffies mod 2 is 1.\n");
+	} else {
+		pr_info("jiffies mod 2 is 0.\n");
+#endif
 		/*
 		 * It appears that no complex operation is around.
 		 * Acquire the per-semaphore lock.
@@ -420,6 +467,11 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 		spin_lock(&sem->lock);
 
 		/* see SEM_BARRIER_1 for purpose/pairing */
+		/* sma->use_global_lock is written to with plain C
+		 * within a spinlock protected region (but: another
+		 * lock, not the sem->lock that we own). No need
+		 * for data_race(), as we use smp_load_acquire().
+		 */
 		if (!smp_load_acquire(&sma->use_global_lock)) {
 			/* fast path successful! */
 			return sops->sem_num;
@@ -430,6 +482,10 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 	/* slow path: acquire the full lock */
 	ipc_lock_object(&sma->sem_perm);
 
+	/* Trivial case: All writes to sma->use_global_lock happen under
+	 * sma->sem_perm.lock. We own that lock, thus plain C access is
+	 * correct.
+	 */
 	if (sma->use_global_lock == 0) {
 		/*
 		 * The use_global_lock mode ended while we waited for