From 51d6186b620a9e354a0d40af06aef1c1299ca223 Mon Sep 17 00:00:00 2001
From: Vikram Mulukutla <markivx@codeaurora.org>
Date: Thu, 27 Jul 2017 12:14:48 -0700
Subject: [PATCH] measure spinlock fairness across differently capable CPUs

How to run this test:
1) compile and boot
2) echo 1 > /sys/module/test/parameters/run_test
3) sleep 5
4) echo 0 > /sys/module/test/parameters/run_test

The test schedules two threads on two separate CPUs
and attempts to acquire the same spinlock from both
threads with a certain loop construct.
(it assumes cpu0 is 'Little' and cpu4 is 'Big'. This
can be changed in the code)

After running the test, check these counters:
cat /sys/module/test/parameters/big_time_us
cat /sys/module/test/parameters/little_time_us

If unfairness exists, little_time should be close to 1 second or
a fairly large millisecond value.
test.c has comments that explain why this is so.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
---
 kernel/sched/Makefile |   2 +-
 kernel/sched/test.c   | 204 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 kernel/sched/test.c

diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index f6cce95..542a1c7 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -15,7 +15,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
 endif
 
-obj-y += core.o loadavg.o clock.o cputime.o
+obj-y += core.o loadavg.o clock.o cputime.o test.o
 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
 obj-y += wait.o swait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o sched_avg.o
diff --git a/kernel/sched/test.c b/kernel/sched/test.c
new file mode 100644
index 0000000..5dd3b0d
--- /dev/null
+++ b/kernel/sched/test.c
@@ -0,0 +1,204 @@
+/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Big.Little spinlock unfairness test by Vikram Mulukutla
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+static DEFINE_SPINLOCK(mylock);
+
+static int run_test;
+static int big_counter, little_counter;
+module_param(big_counter, int, 0644);
+module_param(little_counter, int, 0644);
+
+static int big_time_us, little_time_us;
+module_param(big_time_us, int, 0644);
+module_param(little_time_us, int, 0644);
+
+volatile int sync = 0;
+
+struct testdata {
+	int cpu;
+	int *counter;
+	int *time;
+};
+
+struct testdata littledata = {
+	.cpu = 0,
+	.counter = &little_counter,
+	.time = &little_time_us,
+};
+
+struct testdata bigdata = {
+	.cpu = 4,
+	.counter = &big_counter,
+	.time = &big_time_us,
+};
+
+/*
+ * This is the little CPU thread. It attempts to get the lock, disabling
+ * and enabling interrupts before and after the critical section, and
+ * checks how long it took to get the lock. Ideally this time
+ * should be reflective of just one iteration of the critical section of the
+ * big cpu thread but since this little CPU always loses the ticket strex,
+ * it ends up waiting a full second.
+ */
+static int __ref lockunlock(void *data)
+{
+	struct testdata *testdata = data;
+	u64 spin_start_time, spin_stop_time;
+
+	sched_setaffinity(current->pid, cpumask_of(testdata->cpu));
+
+	while (!sync);
+
+	local_irq_disable();
+	spin_lock(&mylock);
+	while (1) {
+		spin_unlock(&mylock);
+		local_irq_enable();
+
+		cpu_relax();
+
+		local_irq_disable();
+
+		spin_start_time = sched_clock();
+		spin_lock(&mylock);
+		spin_stop_time = sched_clock();
+
+		if (*testdata->time < ((spin_stop_time - spin_start_time)/NSEC_PER_USEC))
+			*testdata->time = (spin_stop_time - spin_start_time)/NSEC_PER_USEC;
+		*testdata->counter = *testdata->counter + 1;
+		if (!run_test)
+			break;
+	}
+	spin_unlock(&mylock);
+	local_irq_enable();
+
+	return 0;
+}
+
+/*
+ * This is the big CPU thread. Ideally it should contend and have its ticket
+ * go in after the little CPU. However what ends up happening is
+ * - if it's running fast enough - this big CPU keeps winning the strex every
+ * time and is able to obtain and release the lock for a full second before it
+ * takes a break, at which point the little CPU wins (finally).
+ *
+ * Note that if there exists a loop construct such as this one in the kernel,
+ * and there is no 'break', then we have a potential livelock situation if
+ * this big CPU needs the little CPU to obtain the lock for the former to
+ * break out of the loop.
+ */
+static int __ref lockunlock_noirq(void *data)
+{
+	struct testdata *testdata = data;
+	u64 spin_start_time, spin_stop_time, time_since_start, test_start_time;
+	bool rollover;
+
+	sched_setaffinity(current->pid, cpumask_of(testdata->cpu));
+
+	while (!sync);
+
+	local_irq_disable();
+	spin_lock(&mylock);
+	test_start_time = sched_clock();
+	while (1) {
+		spin_unlock(&mylock);
+
+		time_since_start = sched_clock() - test_start_time;
+		rollover = time_since_start >= NSEC_PER_SEC;
+
+		/*
+		 * We take a break after 1 second to allow watchdog etc. and
+		 * potentially a user shell to work!
+		 */
+		if (rollover) {
+			local_irq_enable();
+			test_start_time = sched_clock();
+		}
+
+		/*
+		 * Changing this cpu_relax to a udelay(1) will likely
+		 * allow the little CPU enough cycles so it finally wins
+		 * the strex battle.
+		 */
+		cpu_relax();
+
+		if (rollover)
+			local_irq_disable();
+
+		spin_start_time = sched_clock();
+		spin_lock(&mylock);
+		spin_stop_time = sched_clock();
+
+		if (*testdata->time < ((spin_stop_time - spin_start_time)/NSEC_PER_USEC))
+			*testdata->time = (spin_stop_time - spin_start_time)/NSEC_PER_USEC;
+		*testdata->counter = *testdata->counter + 1;
+		if (!run_test)
+			break;
+	}
+	spin_unlock(&mylock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static int runtest_param_set(const char *val, struct kernel_param *kp)
+{
+	int ret;
+	int old_val = run_test;
+
+	ret = param_set_int(val, kp);
+
+	if (ret)
+		return ret;
+
+	/* If run_test is not zero or one, ignore. */
+	if (run_test >> 1) {
+		run_test = old_val;
+		return -EINVAL;
+	}
+
+	if (run_test) {
+		sync = 0;
+
+		*littledata.time = 0;
+		*littledata.counter = 0;
+		*bigdata.time = 0;
+		*bigdata.counter = 0;
+
+		kthread_run(lockunlock, (void *) &littledata,
+						"test/%d", littledata.cpu);
+		kthread_run(lockunlock_noirq, (void *) &bigdata,
+						"test/%d", bigdata.cpu);
+		sync=1;
+	}
+
+	return 0;
+}
+
+module_param_call(run_test, runtest_param_set, param_get_int,
+		  &run_test, S_IRUGO|S_IWUSR);
+
+
+
-- 
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project