All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched: idle: Support nohlt_list kernel parameter
@ 2019-05-22  9:08 zhenwei pi
  0 siblings, 0 replies; only message in thread
From: zhenwei pi @ 2019-05-22  9:08 UTC (permalink / raw)
  To: mingo, peterz; +Cc: linux-kernel, pizhenwei

Currently kernel only supports hlt&nohlt kernel parameters, all the
CPUs would poll or not in idle. Guest OS can't control power in KVM
virtualization, so we can only choose high performance by nohlt or
CPU overcommit by hlt.
nohlt_list kernel parameter allows the specified CPU(s) to poll,
and other CPUs still halt in idle.

We can config boot parameter in guest(Ex, 16vCPUs on x86) like this:
    linux ... irqaffinity=0,2,4,6 nohlt_list=0,2,4,6
it means that 25% of CPUs can always run in vm-mode and benefit
from posted-interrupt.

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 kernel/sched/idle.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 80940939b733..5a0c3498258b 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -50,6 +50,37 @@ static int __init cpu_idle_nopoll_setup(char *__unused)
 	return 1;
 }
 __setup("hlt", cpu_idle_nopoll_setup);
+
+static cpumask_var_t cpu_nohlt_cpumask __cpumask_var_read_mostly;
+static int __init cpu_idle_poll_list_setup(char *str)
+{
+	alloc_bootmem_cpumask_var(&cpu_nohlt_cpumask);
+	if (cpulist_parse(str, cpu_nohlt_cpumask)) {
+		pr_warn("idle: nohlt_list= incorrect CPU range\n");
+		cpumask_clear(cpu_nohlt_cpumask);
+	} else
+		pr_info("idle: nohlt_list=%s\n", str);
+
+	return 1;
+}
+__setup("nohlt_list=", cpu_idle_poll_list_setup);
+
+static inline bool cpu_idle_should_poll(void)
+{
+	int cpu;
+
+	if (cpu_idle_force_poll)
+		return !!cpu_idle_force_poll;
+
+	cpu = smp_processor_id();
+	return (cpumask_available(cpu_nohlt_cpumask) &&
+			!!cpumask_test_cpu(cpu, cpu_nohlt_cpumask));
+}
+#else
+static inline bool cpu_idle_should_poll(void)
+{
+	return !!cpu_idle_force_poll;
+}
 #endif
 
 static noinline int __cpuidle cpu_idle_poll(void)
@@ -60,7 +91,7 @@ static noinline int __cpuidle cpu_idle_poll(void)
 	stop_critical_timings();
 
 	while (!tif_need_resched() &&
-		(cpu_idle_force_poll || tick_check_broadcast_expired()))
+		(cpu_idle_should_poll() || tick_check_broadcast_expired()))
 		cpu_relax();
 	start_critical_timings();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
@@ -256,7 +287,7 @@ static void do_idle(void)
 		 * broadcast device expired for us, we don't want to go deep
 		 * idle as we know that the IPI is going to arrive right away.
 		 */
-		if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
+		if (cpu_idle_should_poll() || tick_check_broadcast_expired()) {
 			tick_nohz_idle_restart_tick();
 			cpu_idle_poll();
 		} else {
-- 
2.11.0


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2019-05-22  9:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-22  9:08 [PATCH] sched: idle: Support nohlt_list kernel parameter zhenwei pi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.