linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
@ 2017-06-05  9:07 Tao Wang
  2017-06-05  9:07 ` [PATCH RFC 2/2] thermal/cpu idle cooling: cpu idle cooling cooperate with cpu cooling Tao Wang
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Tao Wang @ 2017-06-05  9:07 UTC (permalink / raw)
  To: rui.zhang, edubezval, amit.kachhap, viresh.kumar, javi.merino
  Cc: linux-kernel, linux-pm, sunzhaosheng, vincent.guittot,
	jean.wangtao, Tao Wang

cpu idle cooling driver performs synchronized idle injection across
all cpu in same cluster, offers a new method to cooling down cpu,
that is similar to intel_power_clamp driver, but is basically
designed for ARM platform.
Each cluster has its own idle cooling device, each core has its own
idle injection thread, idle injection thread use play_idle to enter
idle. In order to reach deepest idle state, all cores are aligned by
jiffies. the injected idle ratio can be controlled through cooling
device interface.

Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
---
 drivers/thermal/Kconfig            |   13 +
 drivers/thermal/Makefile           |    3 +
 drivers/thermal/cpu_idle_cooling.c |  648 ++++++++++++++++++++++++++++++++++++
 3 files changed, 664 insertions(+)
 create mode 100644 drivers/thermal/cpu_idle_cooling.c

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index b5b5fac..f78e85c 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -154,6 +154,19 @@ config CPU_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config CPU_IDLE_THERMAL
+	tristate "generic cpu idle cooling support"
+	depends on CPU_FREQ
+	help
+	  This implements the generic cpu cooling mechanism through idle
+	  injection.
+
+	  This will throttle cpu by injecting specified idle time in
+	  a fixed cycle. All cpu in same cluster will enter idle synchronously
+	  to reach deepest idle state when injecting idle.
+
+	  If you want this support, you should say Y here.
+
 config CLOCK_THERMAL
 	bool "Generic clock cooling support"
 	depends on COMMON_CLK
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 094d703..a4db66e 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -26,6 +26,9 @@ thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
 # devfreq cooling
 thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
 
+# cpu idle cooling
+obj-$(CONFIG_CPU_IDLE_THERMAL)	+= cpu_idle_cooling.o
+
 # platform thermal drivers
 obj-y				+= broadcom/
 obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
diff --git a/drivers/thermal/cpu_idle_cooling.c b/drivers/thermal/cpu_idle_cooling.c
new file mode 100644
index 0000000..89a15c5
--- /dev/null
+++ b/drivers/thermal/cpu_idle_cooling.c
@@ -0,0 +1,648 @@
+/*
+ *  linux/drivers/thermal/cpu_idle_cooling.c
+ *
+ *  Copyright (C) 2017  Tao Wang <kevin.wangtao@hisilicon.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/topology.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/cpuidle.h>
+#include <linux/thermal.h>
+#include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
+#include <linux/slab.h>
+#include <linux/tick.h>
+#include <linux/wait.h>
+#include <linux/sched/rt.h>
+
+#define MAX_TARGET_RATIO		(50U)
+
+#define DEFAULT_WINDOW_SIZE		(1)
+#define DEFAULT_DURATION_JIFFIES	(20)
+
+struct cpu_idle_cooling_device {
+	int id;
+	struct thermal_cooling_device *cooling_dev;
+	wait_queue_head_t wait_queue;
+
+	/* The cpu assigned to collect stat and update
+	 * control parameters. default to BSP but BSP
+	 * can be offlined.
+	 */
+	unsigned long control_cpu;
+
+	unsigned int set_target_ratio;
+	unsigned int current_ratio;
+	unsigned int control_ratio;
+	unsigned int duration;
+	unsigned int window_size;
+
+	cpumask_var_t related_cpus;
+	cpumask_var_t injected_cpus;
+	struct list_head node;
+	bool should_skip;
+	bool clamping;
+};
+
+static LIST_HEAD(cpu_idle_cooling_dev_list);
+static DEFINE_PER_CPU(struct task_struct *, idle_injection_thread_ptr);
+static DEFINE_MUTEX(cpu_idle_cooling_lock);
+
+unsigned long idle_time[NR_CPUS] = {0};
+unsigned long time_stamp[NR_CPUS] = {0};
+static enum cpuhp_state hp_state;
+
+#define STORE_PARAM(param, min, max)			\
+static ssize_t store_##param(struct device *dev,	\
+	struct device_attribute *attr,			\
+	const char *buf, size_t count)			\
+{									\
+	unsigned int new_value;						\
+	struct thermal_cooling_device *cdev;				\
+	struct cpu_idle_cooling_device *idle_cooling_dev;		\
+									\
+	if (dev == NULL || attr == NULL)				\
+		return 0;						\
+									\
+	if (kstrtouint(buf, 10, &new_value))				\
+		return -EINVAL;						\
+									\
+	if (new_value > max || new_value < min) {			\
+		pr_err("Out of range %u, between %d-%d\n",		\
+			new_value, min, max);				\
+		return -EINVAL;						\
+	}								\
+									\
+	cdev = container_of(dev, struct thermal_cooling_device, device);\
+	idle_cooling_dev = cdev->devdata;				\
+	idle_cooling_dev->param = new_value;				\
+									\
+	/* make new value visible to other cpus */			\
+	smp_mb();							\
+									\
+	return count;							\
+}
+
+STORE_PARAM(duration, 10, 500);
+STORE_PARAM(window_size, 1, 10);
+
+#define SHOW_PARAM(param)				\
+static ssize_t show_##param(struct device *dev,		\
+	struct device_attribute *attr, char *buf)	\
+{									\
+	struct thermal_cooling_device *cdev;				\
+	struct cpu_idle_cooling_device *idle_cooling_dev;		\
+									\
+	if (dev == NULL || attr == NULL)				\
+		return 0;						\
+									\
+	cdev = container_of(dev, struct thermal_cooling_device, device);\
+	idle_cooling_dev = cdev->devdata;				\
+									\
+	return snprintf(buf, 12UL, "%d\n",				\
+					idle_cooling_dev->param);	\
+}
+
+SHOW_PARAM(duration);
+SHOW_PARAM(window_size);
+
+static DEVICE_ATTR(duration, 0644, show_duration, store_duration);
+static DEVICE_ATTR(window_size, 0644, show_window_size, store_window_size);
+
+static struct cpu_idle_cooling_device *
+get_cpu_idle_cooling_dev(unsigned long cpu)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev;
+
+	list_for_each_entry(idle_cooling_dev,
+		&cpu_idle_cooling_dev_list, node) {
+		if (cpumask_test_cpu(cpu, idle_cooling_dev->related_cpus))
+			return idle_cooling_dev;
+	}
+
+	return NULL;
+}
+
+#define K_P		10
+#define MAX_COMP	10
+static unsigned int get_compensation(unsigned int current_ratio,
+		unsigned int target_ratio, unsigned int control_ratio)
+{
+	unsigned int comp;
+
+	comp = abs(current_ratio - target_ratio) * K_P / 10;
+	if (comp > MAX_COMP)
+		comp = MAX_COMP;
+
+	if (current_ratio > target_ratio) {
+		if (control_ratio > comp)
+			comp = control_ratio - comp;
+		else
+			comp = 1;
+	} else {
+		if (control_ratio + comp < MAX_TARGET_RATIO)
+			comp = control_ratio + comp;
+		else
+			comp = MAX_TARGET_RATIO;
+
+		if (comp > (target_ratio * 6 / 5))
+			comp = target_ratio * 6 / 5;
+	}
+
+	return comp;
+}
+
+static void update_stats(struct cpu_idle_cooling_device *idle_cooling_dev)
+{
+	unsigned long cpu;
+	u64 now, now_idle, delta_time, delta_idle;
+	u64 min_idle_ratio = 100;
+	u64 idle_ratio = 0;
+
+	for_each_cpu(cpu, idle_cooling_dev->related_cpus) {
+		now_idle = get_cpu_idle_time(cpu, &now, 0);
+		delta_idle = now_idle - idle_time[cpu];
+		delta_time = now - time_stamp[cpu];
+		idle_time[cpu] = now_idle;
+		time_stamp[cpu] = now;
+
+		if (delta_idle >= delta_time || !cpu_online(cpu))
+			now_idle = 100;
+		else if (delta_time)
+			now_idle = div64_u64(100 * delta_idle, delta_time);
+		else
+			return;
+
+		if (now_idle < min_idle_ratio)
+			min_idle_ratio = now_idle;
+
+		idle_ratio += now_idle;
+	}
+
+	idle_ratio /= cpumask_weight(idle_cooling_dev->related_cpus);
+	if (idle_ratio > MAX_TARGET_RATIO)
+		idle_ratio = min_idle_ratio;
+
+	if (idle_cooling_dev->should_skip)
+		idle_ratio = (idle_cooling_dev->current_ratio + idle_ratio) / 2;
+
+	idle_cooling_dev->current_ratio = (unsigned int)idle_ratio;
+	idle_cooling_dev->control_ratio = get_compensation(idle_ratio,
+				idle_cooling_dev->set_target_ratio,
+				idle_cooling_dev->control_ratio);
+	idle_cooling_dev->should_skip =
+			(idle_ratio > (2 * idle_cooling_dev->set_target_ratio));
+	/* make new control_ratio and should skip flag visible to other cpus */
+	smp_mb();
+}
+
+static void inject_idle_fn(struct cpu_idle_cooling_device *idle_cooling_dev)
+{
+	long sleeptime, guard;
+	unsigned int interval_ms; /* jiffies to sleep for each attempt */
+	unsigned long target_jiffies;
+	unsigned int duration_ms = idle_cooling_dev->duration;
+	unsigned long duration_jiffies = msecs_to_jiffies(duration_ms);
+
+	guard = DIV_ROUND_UP(duration_jiffies * (90 - MAX_TARGET_RATIO), 100);
+
+	/* align idle time */
+	target_jiffies = roundup(jiffies, duration_jiffies);
+	sleeptime = target_jiffies - jiffies;
+	if (sleeptime < guard)
+		sleeptime += duration_jiffies;
+
+	if (sleeptime > 0)
+		schedule_timeout_interruptible(sleeptime);
+
+	interval_ms = duration_ms * idle_cooling_dev->control_ratio / 100;
+
+	if (idle_cooling_dev->should_skip)
+		return;
+
+	if (interval_ms)
+		play_idle(interval_ms);
+}
+
+static int idle_injection_thread(void *arg)
+{
+	unsigned long cpunr = (unsigned long)arg;
+	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 };
+	unsigned int count = 0;
+	struct cpu_idle_cooling_device *idle_cooling_dev;
+
+	set_freezable();
+
+	sched_setscheduler(current, SCHED_FIFO, &param);
+
+	mutex_lock(&cpu_idle_cooling_lock);
+	idle_cooling_dev = get_cpu_idle_cooling_dev(cpunr);
+	mutex_unlock(&cpu_idle_cooling_lock);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(idle_cooling_dev->wait_queue,
+			(idle_cooling_dev->clamping && cpu_online(cpunr)) ||
+			kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		/* rebind thread to cpu */
+		if (set_cpus_allowed_ptr(current, cpumask_of(cpunr)))
+			continue;
+
+		try_to_freeze();
+
+		while (idle_cooling_dev->clamping &&
+			cpu_online(cpunr)) {
+			try_to_freeze();
+
+			count++;
+			/*
+			 * only elected controlling cpu can collect stats
+			 * and update control parameters.
+			 */
+			if (cpunr == idle_cooling_dev->control_cpu
+				&& !(count % idle_cooling_dev->window_size))
+				update_stats(idle_cooling_dev);
+
+			inject_idle_fn(idle_cooling_dev);
+		}
+	}
+
+	return 0;
+}
+
+static int create_idle_thread(struct cpu_idle_cooling_device *idle_cooling_dev)
+{
+	unsigned long cpu;
+	struct task_struct *thread;
+
+	init_waitqueue_head(&idle_cooling_dev->wait_queue);
+
+	/* start one thread per online cpu */
+	for_each_cpu(cpu, idle_cooling_dev->related_cpus) {
+		thread = kthread_create_on_node(idle_injection_thread,
+						(void *) cpu,
+						cpu_to_node(cpu),
+						"kidle_inject/%lu", cpu);
+		/* bind to cpu here */
+		if (likely(!IS_ERR(thread))) {
+			cpumask_set_cpu(cpu, idle_cooling_dev->injected_cpus);
+			kthread_bind(thread, cpu);
+			wake_up_process(thread);
+			per_cpu(idle_injection_thread_ptr, cpu) = thread;
+		} else {
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void stop_idle_thread(struct cpu_idle_cooling_device *idle_cooling_dev)
+{
+	unsigned long cpu;
+	struct task_struct **percpu_thread;
+
+	idle_cooling_dev->clamping = false;
+	/*
+	 * make clamping visible to other cpus and give per cpu threads
+	 * sometime to exit, or gets killed later.
+	 */
+	smp_mb();
+	msleep(idle_cooling_dev->duration);
+	for_each_cpu(cpu, idle_cooling_dev->injected_cpus) {
+		pr_debug("idle inject thread for cpu %lu alive, kill\n", cpu);
+		percpu_thread = per_cpu_ptr(&idle_injection_thread_ptr, cpu);
+		if (!IS_ERR_OR_NULL(*percpu_thread)) {
+			kthread_stop(*percpu_thread);
+			*percpu_thread = NULL;
+		}
+		cpumask_clear_cpu(cpu, idle_cooling_dev->injected_cpus);
+	}
+}
+
+static int idle_injection_cpu_online(unsigned int cpu)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev;
+
+	idle_cooling_dev = get_cpu_idle_cooling_dev(cpu);
+	if (idle_cooling_dev) {
+		/* prefer BSP as controlling CPU */
+		if (cpu == cpumask_first(idle_cooling_dev->injected_cpus)
+			|| !cpu_online(idle_cooling_dev->control_cpu)) {
+			idle_cooling_dev->control_cpu = cpu;
+			/* make new control_cpu visible to other cpus */
+			smp_mb();
+		}
+		wake_up_interruptible(&idle_cooling_dev->wait_queue);
+	}
+
+	return 0;
+}
+
+static int idle_injection_cpu_predown(unsigned int cpu)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev;
+
+	idle_cooling_dev = get_cpu_idle_cooling_dev(cpu);
+	if (idle_cooling_dev) {
+		if (cpu == idle_cooling_dev->control_cpu) {
+			cpu = cpumask_next_and(-1,
+				idle_cooling_dev->injected_cpus,
+				cpu_online_mask);
+
+			if (cpu < nr_cpu_ids)
+				idle_cooling_dev->control_cpu = cpu;
+			/* make new control_cpu visible to other cpus */
+			smp_mb();
+		}
+	}
+
+	return 0;
+}
+
+static int idle_get_max_state(struct thermal_cooling_device *cdev,
+				 unsigned long *state)
+{
+	*state = MAX_TARGET_RATIO;
+
+	return 0;
+}
+
+static int idle_get_cur_state(struct thermal_cooling_device *cdev,
+				 unsigned long *state)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev = cdev->devdata;
+
+	if (true == idle_cooling_dev->clamping)
+		*state = (unsigned long)idle_cooling_dev->current_ratio;
+	else
+		*state = 0; /* indicates invalid state */
+
+	return 0;
+}
+
+static int idle_set_cur_state(struct thermal_cooling_device *cdev,
+				 unsigned long new_target_ratio)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev = cdev->devdata;
+	int ret = 0;
+
+	mutex_lock(&cdev->lock);
+
+	new_target_ratio = clamp(new_target_ratio, 0UL,
+				(unsigned long) MAX_TARGET_RATIO);
+	if (idle_cooling_dev->set_target_ratio == 0
+		&& new_target_ratio > 0) {
+		idle_cooling_dev->set_target_ratio =
+			(unsigned int) new_target_ratio;
+		idle_cooling_dev->control_ratio =
+			idle_cooling_dev->set_target_ratio;
+		idle_cooling_dev->current_ratio =
+			idle_cooling_dev->set_target_ratio;
+		idle_cooling_dev->clamping = true;
+		wake_up_interruptible(&idle_cooling_dev->wait_queue);
+	} else if (idle_cooling_dev->set_target_ratio > 0) {
+		if (new_target_ratio == 0) {
+			idle_cooling_dev->set_target_ratio = 0;
+			idle_cooling_dev->clamping = false;
+			/* make clamping visible to other cpus */
+			smp_mb();
+		} else	/* adjust currently running */ {
+			idle_cooling_dev->set_target_ratio =
+				(unsigned int) new_target_ratio;
+			/* make new set_target_ratio visible to other cpus */
+			smp_mb();
+		}
+	}
+
+	mutex_unlock(&cdev->lock);
+
+	return ret;
+}
+
+static struct thermal_cooling_device_ops cpu_idle_injection_cooling_ops = {
+	.get_max_state = idle_get_max_state,
+	.get_cur_state = idle_get_cur_state,
+	.set_cur_state = idle_set_cur_state,
+};
+
+unsigned long get_max_idle_state(const struct cpumask *clip_cpus)
+{
+	return MAX_TARGET_RATIO;
+}
+EXPORT_SYMBOL_GPL(get_max_idle_state);
+
+void set_idle_state(const struct cpumask *clip_cpus, unsigned long idle_ratio)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev;
+
+	mutex_lock(&cpu_idle_cooling_lock);
+	list_for_each_entry(idle_cooling_dev,
+		&cpu_idle_cooling_dev_list, node) {
+		if (cpumask_subset(idle_cooling_dev->related_cpus, clip_cpus))
+			idle_set_cur_state(idle_cooling_dev->cooling_dev,
+					idle_ratio);
+	}
+	mutex_unlock(&cpu_idle_cooling_lock);
+}
+EXPORT_SYMBOL_GPL(set_idle_state);
+
+struct thermal_cooling_device * __init
+cpu_idle_cooling_register(const struct cpumask *clip_cpus)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev;
+	struct thermal_cooling_device *ret;
+	unsigned long cpu;
+	char dev_name[THERMAL_NAME_LENGTH];
+
+	if (cpumask_empty(clip_cpus))
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&cpu_idle_cooling_lock);
+	get_online_cpus();
+	list_for_each_entry(idle_cooling_dev,
+		&cpu_idle_cooling_dev_list, node) {
+		if (cpumask_intersects(idle_cooling_dev->related_cpus,
+			clip_cpus)) {
+			ret = ERR_PTR(-EINVAL);
+			goto exit_unlock;
+		}
+	}
+
+	idle_cooling_dev = kzalloc(sizeof(*idle_cooling_dev), GFP_KERNEL);
+	if (!idle_cooling_dev) {
+		ret = ERR_PTR(-ENOMEM);
+		goto exit_unlock;
+	}
+
+	if (!zalloc_cpumask_var(&idle_cooling_dev->related_cpus, GFP_KERNEL)) {
+		ret = ERR_PTR(-ENOMEM);
+		goto exit_free_dev;
+	}
+
+	if (!zalloc_cpumask_var(&idle_cooling_dev->injected_cpus, GFP_KERNEL)) {
+		ret = ERR_PTR(-ENOMEM);
+		goto exit_free_related_cpus;
+	}
+
+	cpumask_copy(idle_cooling_dev->related_cpus, clip_cpus);
+	cpu = cpumask_first(clip_cpus);
+	idle_cooling_dev->control_cpu = cpu;
+	idle_cooling_dev->id = topology_physical_package_id(cpu);
+	idle_cooling_dev->window_size = DEFAULT_WINDOW_SIZE;
+	idle_cooling_dev->duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
+
+	if (create_idle_thread(idle_cooling_dev)) {
+		ret = ERR_PTR(-ENOMEM);
+		goto exit_free_injected_cpus;
+	}
+
+	snprintf(dev_name, sizeof(dev_name), "thermal-cpuidle-%d",
+		 idle_cooling_dev->id);
+	ret = thermal_cooling_device_register(dev_name,
+					idle_cooling_dev,
+					&cpu_idle_injection_cooling_ops);
+	if (IS_ERR(ret))
+		goto exit_stop_thread;
+
+	idle_cooling_dev->cooling_dev = ret;
+
+	if (device_create_file(&idle_cooling_dev->cooling_dev->device,
+		&dev_attr_duration)) {
+		ret = ERR_PTR(-ENOMEM);
+		goto exit_unregister_cdev;
+	}
+
+	if (device_create_file(&idle_cooling_dev->cooling_dev->device,
+		&dev_attr_window_size)) {
+		ret = ERR_PTR(-ENOMEM);
+		goto exit_remove_duration_attr;
+	}
+
+	list_add(&idle_cooling_dev->node, &cpu_idle_cooling_dev_list);
+
+	goto exit_unlock;
+
+exit_remove_duration_attr:
+	device_remove_file(&idle_cooling_dev->cooling_dev->device,
+			&dev_attr_duration);
+exit_unregister_cdev:
+	thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
+exit_stop_thread:
+	stop_idle_thread(idle_cooling_dev);
+exit_free_injected_cpus:
+	free_cpumask_var(idle_cooling_dev->injected_cpus);
+exit_free_related_cpus:
+	free_cpumask_var(idle_cooling_dev->related_cpus);
+exit_free_dev:
+	kfree(idle_cooling_dev);
+exit_unlock:
+	put_online_cpus();
+	mutex_unlock(&cpu_idle_cooling_lock);
+	return ret;
+}
+
+void cpu_idle_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev;
+
+	if (IS_ERR_OR_NULL(cdev))
+		return;
+
+	idle_cooling_dev = cdev->devdata;
+
+	mutex_lock(&cpu_idle_cooling_lock);
+	get_online_cpus();
+	list_del(&idle_cooling_dev->node);
+	put_online_cpus();
+	mutex_unlock(&cpu_idle_cooling_lock);
+
+	device_remove_file(&cdev->device, &dev_attr_window_size);
+	device_remove_file(&cdev->device, &dev_attr_duration);
+	thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
+
+	stop_idle_thread(idle_cooling_dev);
+	free_cpumask_var(idle_cooling_dev->injected_cpus);
+	free_cpumask_var(idle_cooling_dev->related_cpus);
+	kfree(idle_cooling_dev);
+}
+
+static void __cpu_idle_cooling_exit(void)
+{
+	struct cpu_idle_cooling_device *idle_cooling_dev;
+
+	while (!list_empty(&cpu_idle_cooling_dev_list)) {
+		idle_cooling_dev = list_first_entry(&cpu_idle_cooling_dev_list,
+				struct cpu_idle_cooling_device, node);
+		cpu_idle_cooling_unregister(idle_cooling_dev->cooling_dev);
+	}
+
+	if (hp_state > 0)
+		cpuhp_remove_state_nocalls(hp_state);
+}
+
+static int __init cpu_idle_cooling_init(void)
+{
+	struct thermal_cooling_device *ret;
+	cpumask_t rest_cpu_mask = CPU_MASK_ALL;
+	const struct cpumask *register_cpu_mask;
+
+	hp_state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+			"thermal/cpu_idle_cooling:online",
+			idle_injection_cpu_online,
+			idle_injection_cpu_predown);
+	if (hp_state < 0)
+		return hp_state;
+
+	do {
+		register_cpu_mask =
+			topology_core_cpumask(cpumask_first(&rest_cpu_mask));
+
+		if (cpumask_empty(register_cpu_mask))
+			break;
+
+		ret = cpu_idle_cooling_register(register_cpu_mask);
+		if (IS_ERR(ret)) {
+			__cpu_idle_cooling_exit();
+			return -ENOMEM;
+		}
+	} while (cpumask_andnot(&rest_cpu_mask,
+				&rest_cpu_mask,
+				register_cpu_mask));
+
+	return 0;
+}
+module_init(cpu_idle_cooling_init);
+
+static void __exit cpu_idle_cooling_exit(void)
+{
+	__cpu_idle_cooling_exit();
+}
+module_exit(cpu_idle_cooling_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tao Wang <kevin.wangtao@hisilicon.com>");
+MODULE_DESCRIPTION("CPU Idle Cooling Driver for ARM Platform");
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH RFC 2/2] thermal/cpu idle cooling: cpu idle cooling cooperate with cpu cooling
  2017-06-05  9:07 [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Tao Wang
@ 2017-06-05  9:07 ` Tao Wang
  2017-06-06  3:41 ` [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Viresh Kumar
  2017-06-08  7:19 ` Vincent Guittot
  2 siblings, 0 replies; 13+ messages in thread
From: Tao Wang @ 2017-06-05  9:07 UTC (permalink / raw)
  To: rui.zhang, edubezval, amit.kachhap, viresh.kumar, javi.merino
  Cc: linux-kernel, linux-pm, sunzhaosheng, vincent.guittot,
	jean.wangtao, Tao Wang

This implements precise cpu thermal control through the cooperation
between cpu idle cooling and cpu cooling, avoid frequency decrease
if idle injection can achieve the target power limit.
This can bring a smoother temperature curve and performance
improvement in some case when there are big power gaps between cpu
OPPs.

Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
---
 drivers/thermal/Kconfig            |   17 ++++++++++++++++
 drivers/thermal/cpu_cooling.c      |   31 +++++++++++++++++++++++++++++
 drivers/thermal/cpu_idle_cooling.c |    5 +++++
 include/linux/cpu_idle_cooling.h   |   38 ++++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+)
 create mode 100644 include/linux/cpu_idle_cooling.h

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f78e85c..ef43d15 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -167,6 +167,23 @@ config CPU_IDLE_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config CPU_THERMAL_COMBO
+	bool "precise cpu cooling support"
+	depends on CPU_THERMAL
+	depends on CPU_IDLE_THERMAL
+	help
+	  This implements precise cpu thermal control through the cooperation
+	  between idle cooling and cpu cooling.
+
+	  This will prevent cpu cooling scaling down cpu frequency when idle
+	  injection can meet the power budget.
+
+	  This can bring a smoother temperature curve and performance
+	  improvement in some case when there are big power gaps between cpu
+	  OPPs.
+
+	  If you want this support, you should say Y here.
+
 config CLOCK_THERMAL
 	bool "Generic clock cooling support"
 	depends on COMMON_CLK
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 69d0f43..a81cd92 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
+#include <linux/cpu_idle_cooling.h>
 
 #include <trace/events/thermal.h>
 
@@ -649,6 +650,31 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 	return ret;
 }
 
+#ifdef CONFIG_CPU_THERMAL_COMBO
+static void idle_cooling_freq_adjust(
+			struct cpufreq_cooling_device *cpufreq_device,
+			u32 power, unsigned int *target_freq)
+{
+	unsigned long target_load, max_idle_ratio;
+	unsigned int idle_freq;
+	s32 cur_dyn_power;
+
+	max_idle_ratio = get_max_idle_state(&cpufreq_device->allowed_cpus);
+	cur_dyn_power = power * 100 / (100 - max_idle_ratio);
+	idle_freq = cpu_power_to_freq(cpufreq_device, cur_dyn_power);
+
+	cur_dyn_power = cpu_freq_to_power(cpufreq_device, idle_freq);
+	target_load = (power * 100) / cur_dyn_power;
+	if (target_load < 100
+		&& ((idle_freq * target_load) >= ((*target_freq) * 100))) {
+		*target_freq = idle_freq;
+	} else {
+		target_load = 100;
+	}
+	set_idle_state(&cpufreq_device->allowed_cpus, 100 - target_load);
+}
+#endif
+
 /**
  * cpufreq_power2state() - convert power to a cooling device state
  * @cdev:	&thermal_cooling_device pointer
@@ -696,6 +722,11 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 	normalised_power = (dyn_power * 100) / last_load;
 	target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
 
+#ifdef CONFIG_CPU_THERMAL_COMBO
+	idle_cooling_freq_adjust(cpufreq_device,
+			normalised_power, &target_freq);
+#endif
+
 	*state = cpufreq_cooling_get_level(cpu, target_freq);
 	if (*state == THERMAL_CSTATE_INVALID) {
 		dev_err_ratelimited(&cdev->device,
diff --git a/drivers/thermal/cpu_idle_cooling.c b/drivers/thermal/cpu_idle_cooling.c
index 89a15c5..4a1844d 100644
--- a/drivers/thermal/cpu_idle_cooling.c
+++ b/drivers/thermal/cpu_idle_cooling.c
@@ -28,6 +28,7 @@
 #include <linux/cpumask.h>
 #include <linux/cpuidle.h>
 #include <linux/thermal.h>
+#include <linux/cpu_idle_cooling.h>
 #include <linux/sched.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/slab.h>
@@ -35,7 +36,11 @@
 #include <linux/wait.h>
 #include <linux/sched/rt.h>
 
+#ifdef CONFIG_CPU_THERMAL_COMBO
+#define MAX_TARGET_RATIO		(20U)
+#else
 #define MAX_TARGET_RATIO		(50U)
+#endif
 
 #define DEFAULT_WINDOW_SIZE		(1)
 #define DEFAULT_DURATION_JIFFIES	(20)
diff --git a/include/linux/cpu_idle_cooling.h b/include/linux/cpu_idle_cooling.h
new file mode 100644
index 0000000..da5f19a
--- /dev/null
+++ b/include/linux/cpu_idle_cooling.h
@@ -0,0 +1,38 @@
+/*
+ *  linux/drivers/thermal/cpu_idle_cooling.h
+ *
+ *  Copyright (C) 2017  Tao Wang <kevin.wangtao@hisilicon.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __CPU_IDLE_COOLING_H__
+#define __CPU_IDLE_COOLING_H__
+
+#include <linux/cpumask.h>
+
+#ifdef CONFIG_CPU_IDLE_THERMAL
+unsigned long get_max_idle_state(const struct cpumask *clip_cpus);
+void set_idle_state(const struct cpumask *clip_cpus,
+			unsigned long idle_ratio);
+#else
+static inline unsigned long get_max_idle_state(const struct cpumask *clip_cpus)
+{
+	return 0;
+}
+
+static inline void set_idle_state(const struct cpumask *clip_cpus,
+			unsigned long idle_ratio) {}
+#endif	/* CONFIG_CPU_IDLE_THERMAL */
+
+#endif /* __CPU_IDLE_COOLING_H__ */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-05  9:07 [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Tao Wang
  2017-06-05  9:07 ` [PATCH RFC 2/2] thermal/cpu idle cooling: cpu idle cooling cooperate with cpu cooling Tao Wang
@ 2017-06-06  3:41 ` Viresh Kumar
  2017-06-07 21:50   ` Daniel Lezcano
                     ` (2 more replies)
  2017-06-08  7:19 ` Vincent Guittot
  2 siblings, 3 replies; 13+ messages in thread
From: Viresh Kumar @ 2017-06-06  3:41 UTC (permalink / raw)
  To: Tao Wang
  Cc: rui.zhang, edubezval, amit.kachhap, javi.merino, linux-kernel,
	linux-pm, sunzhaosheng, vincent.guittot, jean.wangtao,
	Daniel Lezcano

+ Daniel

On 05-06-17, 17:07, Tao Wang wrote:
> cpu idle cooling driver performs synchronized idle injection across
> all cpu in same cluster, offers a new method to cooling down cpu,
> that is similar to intel_power_clamp driver, but is basically
> designed for ARM platform.
> Each cluster has its own idle cooling device, each core has its own
> idle injection thread, idle injection thread use play_idle to enter
> idle. In order to reach deepest idle state, all cores are aligned by
> jiffies. the injected idle ratio can be controlled through cooling
> device interface.
> 
> Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
> ---
>  drivers/thermal/Kconfig            |   13 +
>  drivers/thermal/Makefile           |    3 +
>  drivers/thermal/cpu_idle_cooling.c |  648 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 664 insertions(+)
>  create mode 100644 drivers/thermal/cpu_idle_cooling.c
> 
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index b5b5fac..f78e85c 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -154,6 +154,19 @@ config CPU_THERMAL
>  
>  	  If you want this support, you should say Y here.
>  
> +config CPU_IDLE_THERMAL
> +	tristate "generic cpu idle cooling support"
> +	depends on CPU_FREQ
> +	help
> +	  This implements the generic cpu cooling mechanism through idle
> +	  injection.
> +
> +	  This will throttle cpu by injecting specified idle time in
> +	  a fixed cycle. All cpu in same cluster will enter idle synchronously
> +	  to reach deepest idle state when injecting idle.
> +
> +	  If you want this support, you should say Y here.
> +
>  config CLOCK_THERMAL
>  	bool "Generic clock cooling support"
>  	depends on COMMON_CLK
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 094d703..a4db66e 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -26,6 +26,9 @@ thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
>  # devfreq cooling
>  thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
>  
> +# cpu idle cooling
> +obj-$(CONFIG_CPU_IDLE_THERMAL)	+= cpu_idle_cooling.o
> +
>  # platform thermal drivers
>  obj-y				+= broadcom/
>  obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
> diff --git a/drivers/thermal/cpu_idle_cooling.c b/drivers/thermal/cpu_idle_cooling.c
> new file mode 100644
> index 0000000..89a15c5
> --- /dev/null
> +++ b/drivers/thermal/cpu_idle_cooling.c
> @@ -0,0 +1,648 @@
> +/*
> + *  linux/drivers/thermal/cpu_idle_cooling.c
> + *
> + *  Copyright (C) 2017  Tao Wang <kevin.wangtao@hisilicon.com>
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; version 2 of the License.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include <linux/kernel_stat.h>
> +#include <linux/delay.h>
> +#include <linux/kthread.h>
> +#include <linux/freezer.h>
> +#include <linux/cpu.h>
> +#include <linux/topology.h>
> +#include <linux/cpufreq.h>
> +#include <linux/cpumask.h>
> +#include <linux/cpuidle.h>
> +#include <linux/thermal.h>
> +#include <linux/sched.h>
> +#include <uapi/linux/sched/types.h>
> +#include <linux/slab.h>
> +#include <linux/tick.h>
> +#include <linux/wait.h>
> +#include <linux/sched/rt.h>
> +
> +#define MAX_TARGET_RATIO		(50U)
> +
> +#define DEFAULT_WINDOW_SIZE		(1)
> +#define DEFAULT_DURATION_JIFFIES	(20)
> +
> +struct cpu_idle_cooling_device {
> +	int id;
> +	struct thermal_cooling_device *cooling_dev;
> +	wait_queue_head_t wait_queue;
> +
> +	/* The cpu assigned to collect stat and update
> +	 * control parameters. default to BSP but BSP
> +	 * can be offlined.
> +	 */
> +	unsigned long control_cpu;
> +
> +	unsigned int set_target_ratio;
> +	unsigned int current_ratio;
> +	unsigned int control_ratio;
> +	unsigned int duration;
> +	unsigned int window_size;
> +
> +	cpumask_var_t related_cpus;
> +	cpumask_var_t injected_cpus;
> +	struct list_head node;
> +	bool should_skip;
> +	bool clamping;
> +};
> +
> +static LIST_HEAD(cpu_idle_cooling_dev_list);
> +static DEFINE_PER_CPU(struct task_struct *, idle_injection_thread_ptr);
> +static DEFINE_MUTEX(cpu_idle_cooling_lock);
> +
> +unsigned long idle_time[NR_CPUS] = {0};
> +unsigned long time_stamp[NR_CPUS] = {0};
> +static enum cpuhp_state hp_state;
> +
> +#define STORE_PARAM(param, min, max)			\
> +static ssize_t store_##param(struct device *dev,	\
> +	struct device_attribute *attr,			\
> +	const char *buf, size_t count)			\
> +{									\
> +	unsigned int new_value;						\
> +	struct thermal_cooling_device *cdev;				\
> +	struct cpu_idle_cooling_device *idle_cooling_dev;		\
> +									\
> +	if (dev == NULL || attr == NULL)				\
> +		return 0;						\
> +									\
> +	if (kstrtouint(buf, 10, &new_value))				\
> +		return -EINVAL;						\
> +									\
> +	if (new_value > max || new_value < min) {			\
> +		pr_err("Out of range %u, between %d-%d\n",		\
> +			new_value, min, max);				\
> +		return -EINVAL;						\
> +	}								\
> +									\
> +	cdev = container_of(dev, struct thermal_cooling_device, device);\
> +	idle_cooling_dev = cdev->devdata;				\
> +	idle_cooling_dev->param = new_value;				\
> +									\
> +	/* make new value visible to other cpus */			\
> +	smp_mb();							\
> +									\
> +	return count;							\
> +}
> +
> +STORE_PARAM(duration, 10, 500);
> +STORE_PARAM(window_size, 1, 10);
> +
> +#define SHOW_PARAM(param)				\
> +static ssize_t show_##param(struct device *dev,		\
> +	struct device_attribute *attr, char *buf)	\
> +{									\
> +	struct thermal_cooling_device *cdev;				\
> +	struct cpu_idle_cooling_device *idle_cooling_dev;		\
> +									\
> +	if (dev == NULL || attr == NULL)				\
> +		return 0;						\
> +									\
> +	cdev = container_of(dev, struct thermal_cooling_device, device);\
> +	idle_cooling_dev = cdev->devdata;				\
> +									\
> +	return snprintf(buf, 12UL, "%d\n",				\
> +					idle_cooling_dev->param);	\
> +}
> +
> +SHOW_PARAM(duration);
> +SHOW_PARAM(window_size);
> +
> +static DEVICE_ATTR(duration, 0644, show_duration, store_duration);
> +static DEVICE_ATTR(window_size, 0644, show_window_size, store_window_size);
> +
> +static struct cpu_idle_cooling_device *
> +get_cpu_idle_cooling_dev(unsigned long cpu)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev;
> +
> +	list_for_each_entry(idle_cooling_dev,
> +		&cpu_idle_cooling_dev_list, node) {
> +		if (cpumask_test_cpu(cpu, idle_cooling_dev->related_cpus))
> +			return idle_cooling_dev;
> +	}
> +
> +	return NULL;
> +}
> +
> +#define K_P		10
> +#define MAX_COMP	10
> +static unsigned int get_compensation(unsigned int current_ratio,
> +		unsigned int target_ratio, unsigned int control_ratio)
> +{
> +	unsigned int comp;
> +
> +	comp = abs(current_ratio - target_ratio) * K_P / 10;
> +	if (comp > MAX_COMP)
> +		comp = MAX_COMP;
> +
> +	if (current_ratio > target_ratio) {
> +		if (control_ratio > comp)
> +			comp = control_ratio - comp;
> +		else
> +			comp = 1;
> +	} else {
> +		if (control_ratio + comp < MAX_TARGET_RATIO)
> +			comp = control_ratio + comp;
> +		else
> +			comp = MAX_TARGET_RATIO;
> +
> +		if (comp > (target_ratio * 6 / 5))
> +			comp = target_ratio * 6 / 5;
> +	}
> +
> +	return comp;
> +}
> +
> +static void update_stats(struct cpu_idle_cooling_device *idle_cooling_dev)
> +{
> +	unsigned long cpu;
> +	u64 now, now_idle, delta_time, delta_idle;
> +	u64 min_idle_ratio = 100;
> +	u64 idle_ratio = 0;
> +
> +	for_each_cpu(cpu, idle_cooling_dev->related_cpus) {
> +		now_idle = get_cpu_idle_time(cpu, &now, 0);
> +		delta_idle = now_idle - idle_time[cpu];
> +		delta_time = now - time_stamp[cpu];
> +		idle_time[cpu] = now_idle;
> +		time_stamp[cpu] = now;
> +
> +		if (delta_idle >= delta_time || !cpu_online(cpu))
> +			now_idle = 100;
> +		else if (delta_time)
> +			now_idle = div64_u64(100 * delta_idle, delta_time);
> +		else
> +			return;
> +
> +		if (now_idle < min_idle_ratio)
> +			min_idle_ratio = now_idle;
> +
> +		idle_ratio += now_idle;
> +	}
> +
> +	idle_ratio /= cpumask_weight(idle_cooling_dev->related_cpus);
> +	if (idle_ratio > MAX_TARGET_RATIO)
> +		idle_ratio = min_idle_ratio;
> +
> +	if (idle_cooling_dev->should_skip)
> +		idle_ratio = (idle_cooling_dev->current_ratio + idle_ratio) / 2;
> +
> +	idle_cooling_dev->current_ratio = (unsigned int)idle_ratio;
> +	idle_cooling_dev->control_ratio = get_compensation(idle_ratio,
> +				idle_cooling_dev->set_target_ratio,
> +				idle_cooling_dev->control_ratio);
> +	idle_cooling_dev->should_skip =
> +			(idle_ratio > (2 * idle_cooling_dev->set_target_ratio));
> +	/* make new control_ratio and should skip flag visible to other cpus */
> +	smp_mb();
> +}
> +
> +static void inject_idle_fn(struct cpu_idle_cooling_device *idle_cooling_dev)
> +{
> +	long sleeptime, guard;
> +	unsigned int interval_ms; /* jiffies to sleep for each attempt */
> +	unsigned long target_jiffies;
> +	unsigned int duration_ms = idle_cooling_dev->duration;
> +	unsigned long duration_jiffies = msecs_to_jiffies(duration_ms);
> +
> +	guard = DIV_ROUND_UP(duration_jiffies * (90 - MAX_TARGET_RATIO), 100);
> +
> +	/* align idle time */
> +	target_jiffies = roundup(jiffies, duration_jiffies);
> +	sleeptime = target_jiffies - jiffies;
> +	if (sleeptime < guard)
> +		sleeptime += duration_jiffies;
> +
> +	if (sleeptime > 0)
> +		schedule_timeout_interruptible(sleeptime);
> +
> +	interval_ms = duration_ms * idle_cooling_dev->control_ratio / 100;
> +
> +	if (idle_cooling_dev->should_skip)
> +		return;
> +
> +	if (interval_ms)
> +		play_idle(interval_ms);
> +}
> +
> +static int idle_injection_thread(void *arg)
> +{
> +	unsigned long cpunr = (unsigned long)arg;
> +	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 };
> +	unsigned int count = 0;
> +	struct cpu_idle_cooling_device *idle_cooling_dev;
> +
> +	set_freezable();
> +
> +	sched_setscheduler(current, SCHED_FIFO, &param);
> +
> +	mutex_lock(&cpu_idle_cooling_lock);
> +	idle_cooling_dev = get_cpu_idle_cooling_dev(cpunr);
> +	mutex_unlock(&cpu_idle_cooling_lock);
> +
> +	while (!kthread_should_stop()) {
> +		wait_event_interruptible(idle_cooling_dev->wait_queue,
> +			(idle_cooling_dev->clamping && cpu_online(cpunr)) ||
> +			kthread_should_stop());
> +
> +		if (kthread_should_stop())
> +			break;
> +
> +		/* rebind thread to cpu */
> +		if (set_cpus_allowed_ptr(current, cpumask_of(cpunr)))
> +			continue;
> +
> +		try_to_freeze();
> +
> +		while (idle_cooling_dev->clamping &&
> +			cpu_online(cpunr)) {
> +			try_to_freeze();
> +
> +			count++;
> +			/*
> +			 * only elected controlling cpu can collect stats
> +			 * and update control parameters.
> +			 */
> +			if (cpunr == idle_cooling_dev->control_cpu
> +				&& !(count % idle_cooling_dev->window_size))
> +				update_stats(idle_cooling_dev);
> +
> +			inject_idle_fn(idle_cooling_dev);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int create_idle_thread(struct cpu_idle_cooling_device *idle_cooling_dev)
> +{
> +	unsigned long cpu;
> +	struct task_struct *thread;
> +
> +	init_waitqueue_head(&idle_cooling_dev->wait_queue);
> +
> +	/* start one thread per online cpu */
> +	for_each_cpu(cpu, idle_cooling_dev->related_cpus) {
> +		thread = kthread_create_on_node(idle_injection_thread,
> +						(void *) cpu,
> +						cpu_to_node(cpu),
> +						"kidle_inject/%lu", cpu);
> +		/* bind to cpu here */
> +		if (likely(!IS_ERR(thread))) {
> +			cpumask_set_cpu(cpu, idle_cooling_dev->injected_cpus);
> +			kthread_bind(thread, cpu);
> +			wake_up_process(thread);
> +			per_cpu(idle_injection_thread_ptr, cpu) = thread;
> +		} else {
> +			return -ENOMEM;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void stop_idle_thread(struct cpu_idle_cooling_device *idle_cooling_dev)
> +{
> +	unsigned long cpu;
> +	struct task_struct **percpu_thread;
> +
> +	idle_cooling_dev->clamping = false;
> +	/*
> +	 * make clamping visible to other cpus and give per cpu threads
> +	 * sometime to exit, or gets killed later.
> +	 */
> +	smp_mb();
> +	msleep(idle_cooling_dev->duration);
> +	for_each_cpu(cpu, idle_cooling_dev->injected_cpus) {
> +		pr_debug("idle inject thread for cpu %lu alive, kill\n", cpu);
> +		percpu_thread = per_cpu_ptr(&idle_injection_thread_ptr, cpu);
> +		if (!IS_ERR_OR_NULL(*percpu_thread)) {
> +			kthread_stop(*percpu_thread);
> +			*percpu_thread = NULL;
> +		}
> +		cpumask_clear_cpu(cpu, idle_cooling_dev->injected_cpus);
> +	}
> +}
> +
> +static int idle_injection_cpu_online(unsigned int cpu)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev;
> +
> +	idle_cooling_dev = get_cpu_idle_cooling_dev(cpu);
> +	if (idle_cooling_dev) {
> +		/* prefer BSP as controlling CPU */
> +		if (cpu == cpumask_first(idle_cooling_dev->injected_cpus)
> +			|| !cpu_online(idle_cooling_dev->control_cpu)) {
> +			idle_cooling_dev->control_cpu = cpu;
> +			/* make new control_cpu visible to other cpus */
> +			smp_mb();
> +		}
> +		wake_up_interruptible(&idle_cooling_dev->wait_queue);
> +	}
> +
> +	return 0;
> +}
> +
> +static int idle_injection_cpu_predown(unsigned int cpu)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev;
> +
> +	idle_cooling_dev = get_cpu_idle_cooling_dev(cpu);
> +	if (idle_cooling_dev) {
> +		if (cpu == idle_cooling_dev->control_cpu) {
> +			cpu = cpumask_next_and(-1,
> +				idle_cooling_dev->injected_cpus,
> +				cpu_online_mask);
> +
> +			if (cpu < nr_cpu_ids)
> +				idle_cooling_dev->control_cpu = cpu;
> +			/* make new control_cpu visible to other cpus */
> +			smp_mb();
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int idle_get_max_state(struct thermal_cooling_device *cdev,
> +				 unsigned long *state)
> +{
> +	*state = MAX_TARGET_RATIO;
> +
> +	return 0;
> +}
> +
> +static int idle_get_cur_state(struct thermal_cooling_device *cdev,
> +				 unsigned long *state)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev = cdev->devdata;
> +
> +	if (true == idle_cooling_dev->clamping)
> +		*state = (unsigned long)idle_cooling_dev->current_ratio;
> +	else
> +		*state = 0; /* indicates invalid state */
> +
> +	return 0;
> +}
> +
> +static int idle_set_cur_state(struct thermal_cooling_device *cdev,
> +				 unsigned long new_target_ratio)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev = cdev->devdata;
> +	int ret = 0;
> +
> +	mutex_lock(&cdev->lock);
> +
> +	new_target_ratio = clamp(new_target_ratio, 0UL,
> +				(unsigned long) MAX_TARGET_RATIO);
> +	if (idle_cooling_dev->set_target_ratio == 0
> +		&& new_target_ratio > 0) {
> +		idle_cooling_dev->set_target_ratio =
> +			(unsigned int) new_target_ratio;
> +		idle_cooling_dev->control_ratio =
> +			idle_cooling_dev->set_target_ratio;
> +		idle_cooling_dev->current_ratio =
> +			idle_cooling_dev->set_target_ratio;
> +		idle_cooling_dev->clamping = true;
> +		wake_up_interruptible(&idle_cooling_dev->wait_queue);
> +	} else if (idle_cooling_dev->set_target_ratio > 0) {
> +		if (new_target_ratio == 0) {
> +			idle_cooling_dev->set_target_ratio = 0;
> +			idle_cooling_dev->clamping = false;
> +			/* make clamping visible to other cpus */
> +			smp_mb();
> +		} else	/* adjust currently running */ {
> +			idle_cooling_dev->set_target_ratio =
> +				(unsigned int) new_target_ratio;
> +			/* make new set_target_ratio visible to other cpus */
> +			smp_mb();
> +		}
> +	}
> +
> +	mutex_unlock(&cdev->lock);
> +
> +	return ret;
> +}
> +
> +static struct thermal_cooling_device_ops cpu_idle_injection_cooling_ops = {
> +	.get_max_state = idle_get_max_state,
> +	.get_cur_state = idle_get_cur_state,
> +	.set_cur_state = idle_set_cur_state,
> +};
> +
> +unsigned long get_max_idle_state(const struct cpumask *clip_cpus)
> +{
> +	return MAX_TARGET_RATIO;
> +}
> +EXPORT_SYMBOL_GPL(get_max_idle_state);
> +
> +void set_idle_state(const struct cpumask *clip_cpus, unsigned long idle_ratio)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev;
> +
> +	mutex_lock(&cpu_idle_cooling_lock);
> +	list_for_each_entry(idle_cooling_dev,
> +		&cpu_idle_cooling_dev_list, node) {
> +		if (cpumask_subset(idle_cooling_dev->related_cpus, clip_cpus))
> +			idle_set_cur_state(idle_cooling_dev->cooling_dev,
> +					idle_ratio);
> +	}
> +	mutex_unlock(&cpu_idle_cooling_lock);
> +}
> +EXPORT_SYMBOL_GPL(set_idle_state);
> +
> +struct thermal_cooling_device * __init
> +cpu_idle_cooling_register(const struct cpumask *clip_cpus)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev;
> +	struct thermal_cooling_device *ret;
> +	unsigned long cpu;
> +	char dev_name[THERMAL_NAME_LENGTH];
> +
> +	if (cpumask_empty(clip_cpus))
> +		return ERR_PTR(-ENOMEM);
> +
> +	mutex_lock(&cpu_idle_cooling_lock);
> +	get_online_cpus();
> +	list_for_each_entry(idle_cooling_dev,
> +		&cpu_idle_cooling_dev_list, node) {
> +		if (cpumask_intersects(idle_cooling_dev->related_cpus,
> +			clip_cpus)) {
> +			ret = ERR_PTR(-EINVAL);
> +			goto exit_unlock;
> +		}
> +	}
> +
> +	idle_cooling_dev = kzalloc(sizeof(*idle_cooling_dev), GFP_KERNEL);
> +	if (!idle_cooling_dev) {
> +		ret = ERR_PTR(-ENOMEM);
> +		goto exit_unlock;
> +	}
> +
> +	if (!zalloc_cpumask_var(&idle_cooling_dev->related_cpus, GFP_KERNEL)) {
> +		ret = ERR_PTR(-ENOMEM);
> +		goto exit_free_dev;
> +	}
> +
> +	if (!zalloc_cpumask_var(&idle_cooling_dev->injected_cpus, GFP_KERNEL)) {
> +		ret = ERR_PTR(-ENOMEM);
> +		goto exit_free_related_cpus;
> +	}
> +
> +	cpumask_copy(idle_cooling_dev->related_cpus, clip_cpus);
> +	cpu = cpumask_first(clip_cpus);
> +	idle_cooling_dev->control_cpu = cpu;
> +	idle_cooling_dev->id = topology_physical_package_id(cpu);
> +	idle_cooling_dev->window_size = DEFAULT_WINDOW_SIZE;
> +	idle_cooling_dev->duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
> +
> +	if (create_idle_thread(idle_cooling_dev)) {
> +		ret = ERR_PTR(-ENOMEM);
> +		goto exit_free_injected_cpus;
> +	}
> +
> +	snprintf(dev_name, sizeof(dev_name), "thermal-cpuidle-%d",
> +		 idle_cooling_dev->id);
> +	ret = thermal_cooling_device_register(dev_name,
> +					idle_cooling_dev,
> +					&cpu_idle_injection_cooling_ops);
> +	if (IS_ERR(ret))
> +		goto exit_stop_thread;
> +
> +	idle_cooling_dev->cooling_dev = ret;
> +
> +	if (device_create_file(&idle_cooling_dev->cooling_dev->device,
> +		&dev_attr_duration)) {
> +		ret = ERR_PTR(-ENOMEM);
> +		goto exit_unregister_cdev;
> +	}
> +
> +	if (device_create_file(&idle_cooling_dev->cooling_dev->device,
> +		&dev_attr_window_size)) {
> +		ret = ERR_PTR(-ENOMEM);
> +		goto exit_remove_duration_attr;
> +	}
> +
> +	list_add(&idle_cooling_dev->node, &cpu_idle_cooling_dev_list);
> +
> +	goto exit_unlock;
> +
> +exit_remove_duration_attr:
> +	device_remove_file(&idle_cooling_dev->cooling_dev->device,
> +			&dev_attr_duration);
> +exit_unregister_cdev:
> +	thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
> +exit_stop_thread:
> +	stop_idle_thread(idle_cooling_dev);
> +exit_free_injected_cpus:
> +	free_cpumask_var(idle_cooling_dev->injected_cpus);
> +exit_free_related_cpus:
> +	free_cpumask_var(idle_cooling_dev->related_cpus);
> +exit_free_dev:
> +	kfree(idle_cooling_dev);
> +exit_unlock:
> +	put_online_cpus();
> +	mutex_unlock(&cpu_idle_cooling_lock);
> +	return ret;
> +}
> +
> +void cpu_idle_cooling_unregister(struct thermal_cooling_device *cdev)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev;
> +
> +	if (IS_ERR_OR_NULL(cdev))
> +		return;
> +
> +	idle_cooling_dev = cdev->devdata;
> +
> +	mutex_lock(&cpu_idle_cooling_lock);
> +	get_online_cpus();
> +	list_del(&idle_cooling_dev->node);
> +	put_online_cpus();
> +	mutex_unlock(&cpu_idle_cooling_lock);
> +
> +	device_remove_file(&cdev->device, &dev_attr_window_size);
> +	device_remove_file(&cdev->device, &dev_attr_duration);
> +	thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
> +
> +	stop_idle_thread(idle_cooling_dev);
> +	free_cpumask_var(idle_cooling_dev->injected_cpus);
> +	free_cpumask_var(idle_cooling_dev->related_cpus);
> +	kfree(idle_cooling_dev);
> +}
> +
> +static void __cpu_idle_cooling_exit(void)
> +{
> +	struct cpu_idle_cooling_device *idle_cooling_dev;
> +
> +	while (!list_empty(&cpu_idle_cooling_dev_list)) {
> +		idle_cooling_dev = list_first_entry(&cpu_idle_cooling_dev_list,
> +				struct cpu_idle_cooling_device, node);
> +		cpu_idle_cooling_unregister(idle_cooling_dev->cooling_dev);
> +	}
> +
> +	if (hp_state > 0)
> +		cpuhp_remove_state_nocalls(hp_state);
> +}
> +
> +static int __init cpu_idle_cooling_init(void)
> +{
> +	struct thermal_cooling_device *ret;
> +	cpumask_t rest_cpu_mask = CPU_MASK_ALL;
> +	const struct cpumask *register_cpu_mask;
> +
> +	hp_state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> +			"thermal/cpu_idle_cooling:online",
> +			idle_injection_cpu_online,
> +			idle_injection_cpu_predown);
> +	if (hp_state < 0)
> +		return hp_state;
> +
> +	do {
> +		register_cpu_mask =
> +			topology_core_cpumask(cpumask_first(&rest_cpu_mask));
> +
> +		if (cpumask_empty(register_cpu_mask))
> +			break;
> +
> +		ret = cpu_idle_cooling_register(register_cpu_mask);
> +		if (IS_ERR(ret)) {
> +			__cpu_idle_cooling_exit();
> +			return -ENOMEM;
> +		}
> +	} while (cpumask_andnot(&rest_cpu_mask,
> +				&rest_cpu_mask,
> +				register_cpu_mask));
> +
> +	return 0;
> +}
> +module_init(cpu_idle_cooling_init);
> +
> +static void __exit cpu_idle_cooling_exit(void)
> +{
> +	__cpu_idle_cooling_exit();
> +}
> +module_exit(cpu_idle_cooling_exit);
> +
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR("Tao Wang <kevin.wangtao@hisilicon.com>");
> +MODULE_DESCRIPTION("CPU Idle Cooling Driver for ARM Platform");
> -- 
> 1.7.9.5

-- 
viresh

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-06  3:41 ` [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Viresh Kumar
@ 2017-06-07 21:50   ` Daniel Lezcano
  2017-06-07 21:59     ` Rafael J. Wysocki
  2017-06-08 12:04   ` Daniel Lezcano
  2017-06-09  8:20   ` Daniel Lezcano
  2 siblings, 1 reply; 13+ messages in thread
From: Daniel Lezcano @ 2017-06-07 21:50 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Tao Wang, rui.zhang, edubezval, amit.kachhap, javi.merino,
	linux-kernel, linux-pm, sunzhaosheng, vincent.guittot,
	jean.wangtao

On Tue, Jun 06, 2017 at 09:11:35AM +0530, viresh kumar wrote:
> + Daniel

Hi Viresh,

thanks for the head up.

Before going deeply in the review, I have a dumb question:

Why isn't this mechanism implemented at the scheduler level?


> On 05-06-17, 17:07, Tao Wang wrote:
> > cpu idle cooling driver performs synchronized idle injection across
> > all cpu in same cluster, offers a new method to cooling down cpu,
> > that is similar to intel_power_clamp driver, but is basically
> > designed for ARM platform.
> > Each cluster has its own idle cooling device, each core has its own
> > idle injection thread, idle injection thread use play_idle to enter
> > idle. In order to reach deepest idle state, all cores are aligned by
> > jiffies. the injected idle ratio can be controlled through cooling
> > device interface.
> > 
> > Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
> > ---
> >  drivers/thermal/Kconfig            |   13 +
> >  drivers/thermal/Makefile           |    3 +
> >  drivers/thermal/cpu_idle_cooling.c |  648 ++++++++++++++++++++++++++++++++++++
> >  3 files changed, 664 insertions(+)
> >  create mode 100644 drivers/thermal/cpu_idle_cooling.c
> > 
> > diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> > index b5b5fac..f78e85c 100644
> > --- a/drivers/thermal/Kconfig
> > +++ b/drivers/thermal/Kconfig
> > @@ -154,6 +154,19 @@ config CPU_THERMAL
> >  
> >  	  If you want this support, you should say Y here.
> >  
> > +config CPU_IDLE_THERMAL
> > +	tristate "generic cpu idle cooling support"
> > +	depends on CPU_FREQ
> > +	help
> > +	  This implements the generic cpu cooling mechanism through idle
> > +	  injection.
> > +
> > +	  This will throttle cpu by injecting specified idle time in
> > +	  a fixed cycle. All cpu in same cluster will enter idle synchronously
> > +	  to reach deepest idle state when injecting idle.
> > +
> > +	  If you want this support, you should say Y here.
> > +
> >  config CLOCK_THERMAL
> >  	bool "Generic clock cooling support"
> >  	depends on COMMON_CLK
> > diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> > index 094d703..a4db66e 100644
> > --- a/drivers/thermal/Makefile
> > +++ b/drivers/thermal/Makefile
> > @@ -26,6 +26,9 @@ thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
> >  # devfreq cooling
> >  thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
> >  
> > +# cpu idle cooling
> > +obj-$(CONFIG_CPU_IDLE_THERMAL)	+= cpu_idle_cooling.o
> > +
> >  # platform thermal drivers
> >  obj-y				+= broadcom/
> >  obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
> > diff --git a/drivers/thermal/cpu_idle_cooling.c b/drivers/thermal/cpu_idle_cooling.c
> > new file mode 100644
> > index 0000000..89a15c5
> > --- /dev/null
> > +++ b/drivers/thermal/cpu_idle_cooling.c
> > @@ -0,0 +1,648 @@
> > +/*
> > + *  linux/drivers/thermal/cpu_idle_cooling.c
> > + *
> > + *  Copyright (C) 2017  Tao Wang <kevin.wangtao@hisilicon.com>
> > + *
> > + *  This program is free software; you can redistribute it and/or modify
> > + *  it under the terms of the GNU General Public License as published by
> > + *  the Free Software Foundation; version 2 of the License.
> > + *
> > + *  This program is distributed in the hope that it will be useful,
> > + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + *  GNU General Public License for more details.
> > + *
> > + *  You should have received a copy of the GNU General Public License
> > + *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <linux/kernel.h>
> > +#include <linux/kernel_stat.h>
> > +#include <linux/delay.h>
> > +#include <linux/kthread.h>
> > +#include <linux/freezer.h>
> > +#include <linux/cpu.h>
> > +#include <linux/topology.h>
> > +#include <linux/cpufreq.h>
> > +#include <linux/cpumask.h>
> > +#include <linux/cpuidle.h>
> > +#include <linux/thermal.h>
> > +#include <linux/sched.h>
> > +#include <uapi/linux/sched/types.h>
> > +#include <linux/slab.h>
> > +#include <linux/tick.h>
> > +#include <linux/wait.h>
> > +#include <linux/sched/rt.h>
> > +
> > +#define MAX_TARGET_RATIO		(50U)
> > +
> > +#define DEFAULT_WINDOW_SIZE		(1)
> > +#define DEFAULT_DURATION_JIFFIES	(20)
> > +
> > +struct cpu_idle_cooling_device {
> > +	int id;
> > +	struct thermal_cooling_device *cooling_dev;
> > +	wait_queue_head_t wait_queue;
> > +
> > +	/* The cpu assigned to collect stat and update
> > +	 * control parameters. default to BSP but BSP
> > +	 * can be offlined.
> > +	 */
> > +	unsigned long control_cpu;
> > +
> > +	unsigned int set_target_ratio;
> > +	unsigned int current_ratio;
> > +	unsigned int control_ratio;
> > +	unsigned int duration;
> > +	unsigned int window_size;
> > +
> > +	cpumask_var_t related_cpus;
> > +	cpumask_var_t injected_cpus;
> > +	struct list_head node;
> > +	bool should_skip;
> > +	bool clamping;
> > +};
> > +
> > +static LIST_HEAD(cpu_idle_cooling_dev_list);
> > +static DEFINE_PER_CPU(struct task_struct *, idle_injection_thread_ptr);
> > +static DEFINE_MUTEX(cpu_idle_cooling_lock);
> > +
> > +unsigned long idle_time[NR_CPUS] = {0};
> > +unsigned long time_stamp[NR_CPUS] = {0};
> > +static enum cpuhp_state hp_state;
> > +
> > +#define STORE_PARAM(param, min, max)			\
> > +static ssize_t store_##param(struct device *dev,	\
> > +	struct device_attribute *attr,			\
> > +	const char *buf, size_t count)			\
> > +{									\
> > +	unsigned int new_value;						\
> > +	struct thermal_cooling_device *cdev;				\
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;		\
> > +									\
> > +	if (dev == NULL || attr == NULL)				\
> > +		return 0;						\
> > +									\
> > +	if (kstrtouint(buf, 10, &new_value))				\
> > +		return -EINVAL;						\
> > +									\
> > +	if (new_value > max || new_value < min) {			\
> > +		pr_err("Out of range %u, between %d-%d\n",		\
> > +			new_value, min, max);				\
> > +		return -EINVAL;						\
> > +	}								\
> > +									\
> > +	cdev = container_of(dev, struct thermal_cooling_device, device);\
> > +	idle_cooling_dev = cdev->devdata;				\
> > +	idle_cooling_dev->param = new_value;				\
> > +									\
> > +	/* make new value visible to other cpus */			\
> > +	smp_mb();							\
> > +									\
> > +	return count;							\
> > +}
> > +
> > +STORE_PARAM(duration, 10, 500);
> > +STORE_PARAM(window_size, 1, 10);
> > +
> > +#define SHOW_PARAM(param)				\
> > +static ssize_t show_##param(struct device *dev,		\
> > +	struct device_attribute *attr, char *buf)	\
> > +{									\
> > +	struct thermal_cooling_device *cdev;				\
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;		\
> > +									\
> > +	if (dev == NULL || attr == NULL)				\
> > +		return 0;						\
> > +									\
> > +	cdev = container_of(dev, struct thermal_cooling_device, device);\
> > +	idle_cooling_dev = cdev->devdata;				\
> > +									\
> > +	return snprintf(buf, 12UL, "%d\n",				\
> > +					idle_cooling_dev->param);	\
> > +}
> > +
> > +SHOW_PARAM(duration);
> > +SHOW_PARAM(window_size);
> > +
> > +static DEVICE_ATTR(duration, 0644, show_duration, store_duration);
> > +static DEVICE_ATTR(window_size, 0644, show_window_size, store_window_size);
> > +
> > +static struct cpu_idle_cooling_device *
> > +get_cpu_idle_cooling_dev(unsigned long cpu)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	list_for_each_entry(idle_cooling_dev,
> > +		&cpu_idle_cooling_dev_list, node) {
> > +		if (cpumask_test_cpu(cpu, idle_cooling_dev->related_cpus))
> > +			return idle_cooling_dev;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +#define K_P		10
> > +#define MAX_COMP	10
> > +static unsigned int get_compensation(unsigned int current_ratio,
> > +		unsigned int target_ratio, unsigned int control_ratio)
> > +{
> > +	unsigned int comp;
> > +
> > +	comp = abs(current_ratio - target_ratio) * K_P / 10;
> > +	if (comp > MAX_COMP)
> > +		comp = MAX_COMP;
> > +
> > +	if (current_ratio > target_ratio) {
> > +		if (control_ratio > comp)
> > +			comp = control_ratio - comp;
> > +		else
> > +			comp = 1;
> > +	} else {
> > +		if (control_ratio + comp < MAX_TARGET_RATIO)
> > +			comp = control_ratio + comp;
> > +		else
> > +			comp = MAX_TARGET_RATIO;
> > +
> > +		if (comp > (target_ratio * 6 / 5))
> > +			comp = target_ratio * 6 / 5;
> > +	}
> > +
> > +	return comp;
> > +}
> > +
> > +static void update_stats(struct cpu_idle_cooling_device *idle_cooling_dev)
> > +{
> > +	unsigned long cpu;
> > +	u64 now, now_idle, delta_time, delta_idle;
> > +	u64 min_idle_ratio = 100;
> > +	u64 idle_ratio = 0;
> > +
> > +	for_each_cpu(cpu, idle_cooling_dev->related_cpus) {
> > +		now_idle = get_cpu_idle_time(cpu, &now, 0);
> > +		delta_idle = now_idle - idle_time[cpu];
> > +		delta_time = now - time_stamp[cpu];
> > +		idle_time[cpu] = now_idle;
> > +		time_stamp[cpu] = now;
> > +
> > +		if (delta_idle >= delta_time || !cpu_online(cpu))
> > +			now_idle = 100;
> > +		else if (delta_time)
> > +			now_idle = div64_u64(100 * delta_idle, delta_time);
> > +		else
> > +			return;
> > +
> > +		if (now_idle < min_idle_ratio)
> > +			min_idle_ratio = now_idle;
> > +
> > +		idle_ratio += now_idle;
> > +	}
> > +
> > +	idle_ratio /= cpumask_weight(idle_cooling_dev->related_cpus);
> > +	if (idle_ratio > MAX_TARGET_RATIO)
> > +		idle_ratio = min_idle_ratio;
> > +
> > +	if (idle_cooling_dev->should_skip)
> > +		idle_ratio = (idle_cooling_dev->current_ratio + idle_ratio) / 2;
> > +
> > +	idle_cooling_dev->current_ratio = (unsigned int)idle_ratio;
> > +	idle_cooling_dev->control_ratio = get_compensation(idle_ratio,
> > +				idle_cooling_dev->set_target_ratio,
> > +				idle_cooling_dev->control_ratio);
> > +	idle_cooling_dev->should_skip =
> > +			(idle_ratio > (2 * idle_cooling_dev->set_target_ratio));
> > +	/* make new control_ratio and should skip flag visible to other cpus */
> > +	smp_mb();
> > +}
> > +
> > +static void inject_idle_fn(struct cpu_idle_cooling_device *idle_cooling_dev)
> > +{
> > +	long sleeptime, guard;
> > +	unsigned int interval_ms; /* jiffies to sleep for each attempt */
> > +	unsigned long target_jiffies;
> > +	unsigned int duration_ms = idle_cooling_dev->duration;
> > +	unsigned long duration_jiffies = msecs_to_jiffies(duration_ms);
> > +
> > +	guard = DIV_ROUND_UP(duration_jiffies * (90 - MAX_TARGET_RATIO), 100);
> > +
> > +	/* align idle time */
> > +	target_jiffies = roundup(jiffies, duration_jiffies);
> > +	sleeptime = target_jiffies - jiffies;
> > +	if (sleeptime < guard)
> > +		sleeptime += duration_jiffies;
> > +
> > +	if (sleeptime > 0)
> > +		schedule_timeout_interruptible(sleeptime);
> > +
> > +	interval_ms = duration_ms * idle_cooling_dev->control_ratio / 100;
> > +
> > +	if (idle_cooling_dev->should_skip)
> > +		return;
> > +
> > +	if (interval_ms)
> > +		play_idle(interval_ms);
> > +}
> > +
> > +static int idle_injection_thread(void *arg)
> > +{
> > +	unsigned long cpunr = (unsigned long)arg;
> > +	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 };
> > +	unsigned int count = 0;
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	set_freezable();
> > +
> > +	sched_setscheduler(current, SCHED_FIFO, &param);
> > +
> > +	mutex_lock(&cpu_idle_cooling_lock);
> > +	idle_cooling_dev = get_cpu_idle_cooling_dev(cpunr);
> > +	mutex_unlock(&cpu_idle_cooling_lock);
> > +
> > +	while (!kthread_should_stop()) {
> > +		wait_event_interruptible(idle_cooling_dev->wait_queue,
> > +			(idle_cooling_dev->clamping && cpu_online(cpunr)) ||
> > +			kthread_should_stop());
> > +
> > +		if (kthread_should_stop())
> > +			break;
> > +
> > +		/* rebind thread to cpu */
> > +		if (set_cpus_allowed_ptr(current, cpumask_of(cpunr)))
> > +			continue;
> > +
> > +		try_to_freeze();
> > +
> > +		while (idle_cooling_dev->clamping &&
> > +			cpu_online(cpunr)) {
> > +			try_to_freeze();
> > +
> > +			count++;
> > +			/*
> > +			 * only elected controlling cpu can collect stats
> > +			 * and update control parameters.
> > +			 */
> > +			if (cpunr == idle_cooling_dev->control_cpu
> > +				&& !(count % idle_cooling_dev->window_size))
> > +				update_stats(idle_cooling_dev);
> > +
> > +			inject_idle_fn(idle_cooling_dev);
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int create_idle_thread(struct cpu_idle_cooling_device *idle_cooling_dev)
> > +{
> > +	unsigned long cpu;
> > +	struct task_struct *thread;
> > +
> > +	init_waitqueue_head(&idle_cooling_dev->wait_queue);
> > +
> > +	/* start one thread per online cpu */
> > +	for_each_cpu(cpu, idle_cooling_dev->related_cpus) {
> > +		thread = kthread_create_on_node(idle_injection_thread,
> > +						(void *) cpu,
> > +						cpu_to_node(cpu),
> > +						"kidle_inject/%lu", cpu);
> > +		/* bind to cpu here */
> > +		if (likely(!IS_ERR(thread))) {
> > +			cpumask_set_cpu(cpu, idle_cooling_dev->injected_cpus);
> > +			kthread_bind(thread, cpu);
> > +			wake_up_process(thread);
> > +			per_cpu(idle_injection_thread_ptr, cpu) = thread;
> > +		} else {
> > +			return -ENOMEM;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void stop_idle_thread(struct cpu_idle_cooling_device *idle_cooling_dev)
> > +{
> > +	unsigned long cpu;
> > +	struct task_struct **percpu_thread;
> > +
> > +	idle_cooling_dev->clamping = false;
> > +	/*
> > +	 * make clamping visible to other cpus and give per cpu threads
> > +	 * sometime to exit, or gets killed later.
> > +	 */
> > +	smp_mb();
> > +	msleep(idle_cooling_dev->duration);
> > +	for_each_cpu(cpu, idle_cooling_dev->injected_cpus) {
> > +		pr_debug("idle inject thread for cpu %lu alive, kill\n", cpu);
> > +		percpu_thread = per_cpu_ptr(&idle_injection_thread_ptr, cpu);
> > +		if (!IS_ERR_OR_NULL(*percpu_thread)) {
> > +			kthread_stop(*percpu_thread);
> > +			*percpu_thread = NULL;
> > +		}
> > +		cpumask_clear_cpu(cpu, idle_cooling_dev->injected_cpus);
> > +	}
> > +}
> > +
> > +static int idle_injection_cpu_online(unsigned int cpu)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	idle_cooling_dev = get_cpu_idle_cooling_dev(cpu);
> > +	if (idle_cooling_dev) {
> > +		/* prefer BSP as controlling CPU */
> > +		if (cpu == cpumask_first(idle_cooling_dev->injected_cpus)
> > +			|| !cpu_online(idle_cooling_dev->control_cpu)) {
> > +			idle_cooling_dev->control_cpu = cpu;
> > +			/* make new control_cpu visible to other cpus */
> > +			smp_mb();
> > +		}
> > +		wake_up_interruptible(&idle_cooling_dev->wait_queue);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int idle_injection_cpu_predown(unsigned int cpu)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	idle_cooling_dev = get_cpu_idle_cooling_dev(cpu);
> > +	if (idle_cooling_dev) {
> > +		if (cpu == idle_cooling_dev->control_cpu) {
> > +			cpu = cpumask_next_and(-1,
> > +				idle_cooling_dev->injected_cpus,
> > +				cpu_online_mask);
> > +
> > +			if (cpu < nr_cpu_ids)
> > +				idle_cooling_dev->control_cpu = cpu;
> > +			/* make new control_cpu visible to other cpus */
> > +			smp_mb();
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int idle_get_max_state(struct thermal_cooling_device *cdev,
> > +				 unsigned long *state)
> > +{
> > +	*state = MAX_TARGET_RATIO;
> > +
> > +	return 0;
> > +}
> > +
> > +static int idle_get_cur_state(struct thermal_cooling_device *cdev,
> > +				 unsigned long *state)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev = cdev->devdata;
> > +
> > +	if (true == idle_cooling_dev->clamping)
> > +		*state = (unsigned long)idle_cooling_dev->current_ratio;
> > +	else
> > +		*state = 0; /* indicates invalid state */
> > +
> > +	return 0;
> > +}
> > +
> > +static int idle_set_cur_state(struct thermal_cooling_device *cdev,
> > +				 unsigned long new_target_ratio)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev = cdev->devdata;
> > +	int ret = 0;
> > +
> > +	mutex_lock(&cdev->lock);
> > +
> > +	new_target_ratio = clamp(new_target_ratio, 0UL,
> > +				(unsigned long) MAX_TARGET_RATIO);
> > +	if (idle_cooling_dev->set_target_ratio == 0
> > +		&& new_target_ratio > 0) {
> > +		idle_cooling_dev->set_target_ratio =
> > +			(unsigned int) new_target_ratio;
> > +		idle_cooling_dev->control_ratio =
> > +			idle_cooling_dev->set_target_ratio;
> > +		idle_cooling_dev->current_ratio =
> > +			idle_cooling_dev->set_target_ratio;
> > +		idle_cooling_dev->clamping = true;
> > +		wake_up_interruptible(&idle_cooling_dev->wait_queue);
> > +	} else if (idle_cooling_dev->set_target_ratio > 0) {
> > +		if (new_target_ratio == 0) {
> > +			idle_cooling_dev->set_target_ratio = 0;
> > +			idle_cooling_dev->clamping = false;
> > +			/* make clamping visible to other cpus */
> > +			smp_mb();
> > +		} else	/* adjust currently running */ {
> > +			idle_cooling_dev->set_target_ratio =
> > +				(unsigned int) new_target_ratio;
> > +			/* make new set_target_ratio visible to other cpus */
> > +			smp_mb();
> > +		}
> > +	}
> > +
> > +	mutex_unlock(&cdev->lock);
> > +
> > +	return ret;
> > +}
> > +
> > +static struct thermal_cooling_device_ops cpu_idle_injection_cooling_ops = {
> > +	.get_max_state = idle_get_max_state,
> > +	.get_cur_state = idle_get_cur_state,
> > +	.set_cur_state = idle_set_cur_state,
> > +};
> > +
> > +unsigned long get_max_idle_state(const struct cpumask *clip_cpus)
> > +{
> > +	return MAX_TARGET_RATIO;
> > +}
> > +EXPORT_SYMBOL_GPL(get_max_idle_state);
> > +
> > +void set_idle_state(const struct cpumask *clip_cpus, unsigned long idle_ratio)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	mutex_lock(&cpu_idle_cooling_lock);
> > +	list_for_each_entry(idle_cooling_dev,
> > +		&cpu_idle_cooling_dev_list, node) {
> > +		if (cpumask_subset(idle_cooling_dev->related_cpus, clip_cpus))
> > +			idle_set_cur_state(idle_cooling_dev->cooling_dev,
> > +					idle_ratio);
> > +	}
> > +	mutex_unlock(&cpu_idle_cooling_lock);
> > +}
> > +EXPORT_SYMBOL_GPL(set_idle_state);
> > +
> > +struct thermal_cooling_device * __init
> > +cpu_idle_cooling_register(const struct cpumask *clip_cpus)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +	struct thermal_cooling_device *ret;
> > +	unsigned long cpu;
> > +	char dev_name[THERMAL_NAME_LENGTH];
> > +
> > +	if (cpumask_empty(clip_cpus))
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	mutex_lock(&cpu_idle_cooling_lock);
> > +	get_online_cpus();
> > +	list_for_each_entry(idle_cooling_dev,
> > +		&cpu_idle_cooling_dev_list, node) {
> > +		if (cpumask_intersects(idle_cooling_dev->related_cpus,
> > +			clip_cpus)) {
> > +			ret = ERR_PTR(-EINVAL);
> > +			goto exit_unlock;
> > +		}
> > +	}
> > +
> > +	idle_cooling_dev = kzalloc(sizeof(*idle_cooling_dev), GFP_KERNEL);
> > +	if (!idle_cooling_dev) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_unlock;
> > +	}
> > +
> > +	if (!zalloc_cpumask_var(&idle_cooling_dev->related_cpus, GFP_KERNEL)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_free_dev;
> > +	}
> > +
> > +	if (!zalloc_cpumask_var(&idle_cooling_dev->injected_cpus, GFP_KERNEL)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_free_related_cpus;
> > +	}
> > +
> > +	cpumask_copy(idle_cooling_dev->related_cpus, clip_cpus);
> > +	cpu = cpumask_first(clip_cpus);
> > +	idle_cooling_dev->control_cpu = cpu;
> > +	idle_cooling_dev->id = topology_physical_package_id(cpu);
> > +	idle_cooling_dev->window_size = DEFAULT_WINDOW_SIZE;
> > +	idle_cooling_dev->duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
> > +
> > +	if (create_idle_thread(idle_cooling_dev)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_free_injected_cpus;
> > +	}
> > +
> > +	snprintf(dev_name, sizeof(dev_name), "thermal-cpuidle-%d",
> > +		 idle_cooling_dev->id);
> > +	ret = thermal_cooling_device_register(dev_name,
> > +					idle_cooling_dev,
> > +					&cpu_idle_injection_cooling_ops);
> > +	if (IS_ERR(ret))
> > +		goto exit_stop_thread;
> > +
> > +	idle_cooling_dev->cooling_dev = ret;
> > +
> > +	if (device_create_file(&idle_cooling_dev->cooling_dev->device,
> > +		&dev_attr_duration)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_unregister_cdev;
> > +	}
> > +
> > +	if (device_create_file(&idle_cooling_dev->cooling_dev->device,
> > +		&dev_attr_window_size)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_remove_duration_attr;
> > +	}
> > +
> > +	list_add(&idle_cooling_dev->node, &cpu_idle_cooling_dev_list);
> > +
> > +	goto exit_unlock;
> > +
> > +exit_remove_duration_attr:
> > +	device_remove_file(&idle_cooling_dev->cooling_dev->device,
> > +			&dev_attr_duration);
> > +exit_unregister_cdev:
> > +	thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
> > +exit_stop_thread:
> > +	stop_idle_thread(idle_cooling_dev);
> > +exit_free_injected_cpus:
> > +	free_cpumask_var(idle_cooling_dev->injected_cpus);
> > +exit_free_related_cpus:
> > +	free_cpumask_var(idle_cooling_dev->related_cpus);
> > +exit_free_dev:
> > +	kfree(idle_cooling_dev);
> > +exit_unlock:
> > +	put_online_cpus();
> > +	mutex_unlock(&cpu_idle_cooling_lock);
> > +	return ret;
> > +}
> > +
> > +void cpu_idle_cooling_unregister(struct thermal_cooling_device *cdev)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	if (IS_ERR_OR_NULL(cdev))
> > +		return;
> > +
> > +	idle_cooling_dev = cdev->devdata;
> > +
> > +	mutex_lock(&cpu_idle_cooling_lock);
> > +	get_online_cpus();
> > +	list_del(&idle_cooling_dev->node);
> > +	put_online_cpus();
> > +	mutex_unlock(&cpu_idle_cooling_lock);
> > +
> > +	device_remove_file(&cdev->device, &dev_attr_window_size);
> > +	device_remove_file(&cdev->device, &dev_attr_duration);
> > +	thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
> > +
> > +	stop_idle_thread(idle_cooling_dev);
> > +	free_cpumask_var(idle_cooling_dev->injected_cpus);
> > +	free_cpumask_var(idle_cooling_dev->related_cpus);
> > +	kfree(idle_cooling_dev);
> > +}
> > +
> > +static void __cpu_idle_cooling_exit(void)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	while (!list_empty(&cpu_idle_cooling_dev_list)) {
> > +		idle_cooling_dev = list_first_entry(&cpu_idle_cooling_dev_list,
> > +				struct cpu_idle_cooling_device, node);
> > +		cpu_idle_cooling_unregister(idle_cooling_dev->cooling_dev);
> > +	}
> > +
> > +	if (hp_state > 0)
> > +		cpuhp_remove_state_nocalls(hp_state);
> > +}
> > +
> > +static int __init cpu_idle_cooling_init(void)
> > +{
> > +	struct thermal_cooling_device *ret;
> > +	cpumask_t rest_cpu_mask = CPU_MASK_ALL;
> > +	const struct cpumask *register_cpu_mask;
> > +
> > +	hp_state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> > +			"thermal/cpu_idle_cooling:online",
> > +			idle_injection_cpu_online,
> > +			idle_injection_cpu_predown);
> > +	if (hp_state < 0)
> > +		return hp_state;
> > +
> > +	do {
> > +		register_cpu_mask =
> > +			topology_core_cpumask(cpumask_first(&rest_cpu_mask));
> > +
> > +		if (cpumask_empty(register_cpu_mask))
> > +			break;
> > +
> > +		ret = cpu_idle_cooling_register(register_cpu_mask);
> > +		if (IS_ERR(ret)) {
> > +			__cpu_idle_cooling_exit();
> > +			return -ENOMEM;
> > +		}
> > +	} while (cpumask_andnot(&rest_cpu_mask,
> > +				&rest_cpu_mask,
> > +				register_cpu_mask));
> > +
> > +	return 0;
> > +}
> > +module_init(cpu_idle_cooling_init);
> > +
> > +static void __exit cpu_idle_cooling_exit(void)
> > +{
> > +	__cpu_idle_cooling_exit();
> > +}
> > +module_exit(cpu_idle_cooling_exit);
> > +
> > +MODULE_LICENSE("GPL v2");
> > +MODULE_AUTHOR("Tao Wang <kevin.wangtao@hisilicon.com>");
> > +MODULE_DESCRIPTION("CPU Idle Cooling Driver for ARM Platform");
> > -- 
> > 1.7.9.5
> 
> -- 
> viresh

-- 

 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-07 21:50   ` Daniel Lezcano
@ 2017-06-07 21:59     ` Rafael J. Wysocki
  2017-06-08  7:52       ` Daniel Lezcano
  0 siblings, 1 reply; 13+ messages in thread
From: Rafael J. Wysocki @ 2017-06-07 21:59 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Viresh Kumar, Tao Wang, Zhang, Rui, Eduardo Valentin,
	amit.kachhap, javi.merino, Linux Kernel Mailing List, Linux PM,
	sunzhaosheng, Vincent Guittot, jean.wangtao

On Wed, Jun 7, 2017 at 11:50 PM, Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
> On Tue, Jun 06, 2017 at 09:11:35AM +0530, viresh kumar wrote:
>> + Daniel
>
> Hi Viresh,
>
> thanks for the head up.
>
> Before going deeply in the review, I have a dumb question:
>
> Why isn't this mechanism implemented at the scheduler level?

I suppose for the same reason why intel_powerclamp is not implemented
like that: the scheduler maintainers don't like the idea.

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-05  9:07 [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Tao Wang
  2017-06-05  9:07 ` [PATCH RFC 2/2] thermal/cpu idle cooling: cpu idle cooling cooperate with cpu cooling Tao Wang
  2017-06-06  3:41 ` [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Viresh Kumar
@ 2017-06-08  7:19 ` Vincent Guittot
       [not found]   ` <CAMBp1jNWrosu4vaFwqy4cKs69r3-zX_b06-09eitCcfteHBG2w@mail.gmail.com>
  2 siblings, 1 reply; 13+ messages in thread
From: Vincent Guittot @ 2017-06-08  7:19 UTC (permalink / raw)
  To: Tao Wang
  Cc: rui.zhang, edubezval, Amit Kachhap, viresh kumar, javi.merino,
	linux-kernel, linux-pm, Sunzhaosheng Sun(Zhaosheng),
	Jean Wangtao, Daniel Lezcano

Hi Kevin,

On 5 June 2017 at 11:07, Tao Wang <kevin.wangtao@hisilicon.com> wrote:
> cpu idle cooling driver performs synchronized idle injection across
> all cpu in same cluster, offers a new method to cooling down cpu,
> that is similar to intel_power_clamp driver, but is basically
> designed for ARM platform.
> Each cluster has its own idle cooling device, each core has its own
> idle injection thread, idle injection thread use play_idle to enter
> idle. In order to reach deepest idle state, all cores are aligned by
> jiffies. the injected idle ratio can be controlled through cooling
> device interface.
>
> Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
> ---
>  drivers/thermal/Kconfig            |   13 +
>  drivers/thermal/Makefile           |    3 +
>  drivers/thermal/cpu_idle_cooling.c |  648 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 664 insertions(+)
>  create mode 100644 drivers/thermal/cpu_idle_cooling.c
>
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index b5b5fac..f78e85c 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -154,6 +154,19 @@ config CPU_THERMAL
>
>           If you want this support, you should say Y here.
>
> +config CPU_IDLE_THERMAL
> +       tristate "generic cpu idle cooling support"
> +       depends on CPU_FREQ

Does CPU_IDLE_THERMAL really depend on CPU_FREQ ?
This dependency should be put for CPU_THERMAL_COMBO in the patch 2

> +       help
> +         This implements the generic cpu cooling mechanism through idle
> +         injection.
> +
> +         This will throttle cpu by injecting specified idle time in
> +         a fixed cycle. All cpu in same cluster will enter idle synchronously
> +         to reach deepest idle state when injecting idle.
> +
> +         If you want this support, you should say Y here.
> +
>  config CLOCK_THERMAL
>         bool "Generic clock cooling support"
>         depends on COMMON_CLK
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 094d703..a4db66e 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -26,6 +26,9 @@ thermal_sys-$(CONFIG_CLOCK_THERMAL)   += clock_cooling.o
>  # devfreq cooling
>  thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
>
> +# cpu idle cooling
> +obj-$(CONFIG_CPU_IDLE_THERMAL) += cpu_idle_cooling.o
> +
>  # platform thermal drivers
>  obj-y                          += broadcom/
>  obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)     += qcom-spmi-temp-alarm.o
> diff --git a/drivers/thermal/cpu_idle_cooling.c b/drivers/thermal/cpu_idle_cooling.c
> new file mode 100644
> index 0000000..89a15c5
> --- /dev/null

[snip]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-07 21:59     ` Rafael J. Wysocki
@ 2017-06-08  7:52       ` Daniel Lezcano
  0 siblings, 0 replies; 13+ messages in thread
From: Daniel Lezcano @ 2017-06-08  7:52 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Viresh Kumar, Tao Wang, Zhang, Rui, Eduardo Valentin,
	amit.kachhap, javi.merino, Linux Kernel Mailing List, Linux PM,
	sunzhaosheng, Vincent Guittot, jean.wangtao

On Wed, Jun 07, 2017 at 11:59:12PM +0200, Rafael J. Wysocki wrote:
> On Wed, Jun 7, 2017 at 11:50 PM, Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
> > On Tue, Jun 06, 2017 at 09:11:35AM +0530, viresh kumar wrote:
> >> + Daniel
> >
> > Hi Viresh,
> >
> > thanks for the head up.
> >
> > Before going deeply in the review, I have a dumb question:
> >
> > Why isn't this mechanism implemented at the scheduler level?
> 
> I suppose for the same reason why intel_powerclamp is not implemented
> like that: the scheduler maintainers don't like the idea.

Ok, I see.

Thanks.

  -- Daniel


-- 

 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-06  3:41 ` [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Viresh Kumar
  2017-06-07 21:50   ` Daniel Lezcano
@ 2017-06-08 12:04   ` Daniel Lezcano
  2017-06-09  8:20   ` Daniel Lezcano
  2 siblings, 0 replies; 13+ messages in thread
From: Daniel Lezcano @ 2017-06-08 12:04 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Tao Wang, rui.zhang, edubezval, amit.kachhap, javi.merino,
	linux-kernel, linux-pm, sunzhaosheng, vincent.guittot,
	jean.wangtao

On Tue, Jun 06, 2017 at 09:11:35AM +0530, viresh kumar wrote:
> + Daniel
> 
> On 05-06-17, 17:07, Tao Wang wrote:
> > cpu idle cooling driver performs synchronized idle injection across
> > all cpu in same cluster, offers a new method to cooling down cpu,
> > that is similar to intel_power_clamp driver, but is basically
> > designed for ARM platform.
> > Each cluster has its own idle cooling device, each core has its own
> > idle injection thread, idle injection thread use play_idle to enter
> > idle. In order to reach deepest idle state, all cores are aligned by
> > jiffies. the injected idle ratio can be controlled through cooling
> > device interface.
> > 

I don't see any ARM specific code in this driver as stated in the description.

Was it considered to make the intel_powerclamp and this driver to converge to a
single generic driver?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
       [not found]   ` <CAMBp1jNWrosu4vaFwqy4cKs69r3-zX_b06-09eitCcfteHBG2w@mail.gmail.com>
@ 2017-06-08 13:42     ` Vincent Guittot
  0 siblings, 0 replies; 13+ messages in thread
From: Vincent Guittot @ 2017-06-08 13:42 UTC (permalink / raw)
  To: Jean Wangtao
  Cc: edubezval, javi.merino, viresh kumar, linux-kernel, Tao Wang,
	Sunzhaosheng Sun(Zhaosheng),
	linux-pm, Amit Kachhap, Daniel Lezcano, rui.zhang

On 8 June 2017 at 14:59, Jean Wangtao <jean.wangtao@linaro.org> wrote:
>
> Hi Vincent,
>
> 2017年6月8日 下午3:19,"Vincent Guittot" <vincent.guittot@linaro.org>写道:
>
> Hi Kevin,
>
> On 5 June 2017 at 11:07, Tao Wang <kevin.wangtao@hisilicon.com> wrote:
>> cpu idle cooling driver performs synchronized idle injection across
>> all cpu in same cluster, offers a new method to cooling down cpu,
>> that is similar to intel_power_clamp driver, but is basically
>> designed for ARM platform.
>> Each cluster has its own idle cooling device, each core has its own
>> idle injection thread, idle injection thread use play_idle to enter
>> idle. In order to reach deepest idle state, all cores are aligned by
>> jiffies. the injected idle ratio can be controlled through cooling
>> device interface.
>>
>> Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
>> ---
>>  drivers/thermal/Kconfig            |   13 +
>>  drivers/thermal/Makefile           |    3 +
>>  drivers/thermal/cpu_idle_cooling.c |  648
>> ++++++++++++++++++++++++++++++++++++
>>  3 files changed, 664 insertions(+)
>>  create mode 100644 drivers/thermal/cpu_idle_cooling.c
>>
>> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
>> index b5b5fac..f78e85c 100644
>> --- a/drivers/thermal/Kconfig
>> +++ b/drivers/thermal/Kconfig
>> @@ -154,6 +154,19 @@ config CPU_THERMAL
>>
>>           If you want this support, you should say Y here.
>>
>> +config CPU_IDLE_THERMAL
>> +       tristate "generic cpu idle cooling support"
>> +       depends on CPU_FREQ
>
> Does CPU_IDLE_THERMAL really depend on CPU_FREQ ?
>
>
> because I use a interface of cpufreq to get cpu idle time and current wall
> time, that cause the dependency

IMO, you should better use directly get_cpu_idle_time_us and remove
your dependency with cpufreq
Furthermore, get_cpu_idle_time from cpufreq may consider iowait as
busy time which could make sense for cpufreq but not really for idle
injection


>
> This dependency should be put for CPU_THERMAL_COMBO in the patch 2
>
>
> CPU_THERMAL_COMBO depend on CPU_THERMAL and CPU_IDLE_THERMAL, but the code
> of patch 2 have no direct relationship with cpufreq, so I didn't add the
> dependency

yes you're right, CPU_THERMAL_COMBO depends already on CPUFREQ

>
>
>> +       help
>> +         This implements the generic cpu cooling mechanism through idle
>> +         injection.
>> +
>> +         This will throttle cpu by injecting specified idle time in
>> +         a fixed cycle. All cpu in same cluster will enter idle
>> synchronously
>> +         to reach deepest idle state when injecting idle.
>> +
>> +         If you want this support, you should say Y here.
>> +
>>  config CLOCK_THERMAL
>>         bool "Generic clock cooling support"
>>         depends on COMMON_CLK
>> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
>> index 094d703..a4db66e 100644
>> --- a/drivers/thermal/Makefile
>> +++ b/drivers/thermal/Makefile
>> @@ -26,6 +26,9 @@ thermal_sys-$(CONFIG_CLOCK_THERMAL)   += clock_cooling.o
>>  # devfreq cooling
>>  thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
>>
>> +# cpu idle cooling
>> +obj-$(CONFIG_CPU_IDLE_THERMAL) += cpu_idle_cooling.o
>> +
>>  # platform thermal drivers
>>  obj-y                          += broadcom/
>>  obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)     += qcom-spmi-temp-alarm.o
>> diff --git a/drivers/thermal/cpu_idle_cooling.c
>> b/drivers/thermal/cpu_idle_cooling.c
>> new file mode 100644
>> index 0000000..89a15c5
>> --- /dev/null
>
> [snip]
>
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-06  3:41 ` [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Viresh Kumar
  2017-06-07 21:50   ` Daniel Lezcano
  2017-06-08 12:04   ` Daniel Lezcano
@ 2017-06-09  8:20   ` Daniel Lezcano
       [not found]     ` <CAMBp1jM4KV5yj2=p=JBSfnw9u1D6qqtru49Rqp_8d3ePkHcE5Q@mail.gmail.com>
  2 siblings, 1 reply; 13+ messages in thread
From: Daniel Lezcano @ 2017-06-09  8:20 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Tao Wang, rui.zhang, edubezval, amit.kachhap, javi.merino,
	linux-kernel, linux-pm, sunzhaosheng, vincent.guittot,
	jean.wangtao

On Tue, Jun 06, 2017 at 09:11:35AM +0530, viresh kumar wrote:
> + Daniel
> 
> On 05-06-17, 17:07, Tao Wang wrote:
> > cpu idle cooling driver performs synchronized idle injection across
> > all cpu in same cluster, offers a new method to cooling down cpu,
> > that is similar to intel_power_clamp driver, but is basically
> > designed for ARM platform.
> > Each cluster has its own idle cooling device, each core has its own
> > idle injection thread, idle injection thread use play_idle to enter
> > idle. In order to reach deepest idle state, all cores are aligned by
> > jiffies. the injected idle ratio can be controlled through cooling
> > device interface.
> > 
> > Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>

[ ... ]

Hi Kevin,

I'm failing to understand all the cpumask logic.

Can you explain the rational?

Thanks.

  -- Daniel


> > +struct thermal_cooling_device * __init
> > +cpu_idle_cooling_register(const struct cpumask *clip_cpus)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +	struct thermal_cooling_device *ret;
> > +	unsigned long cpu;
> > +	char dev_name[THERMAL_NAME_LENGTH];
> > +
> > +	if (cpumask_empty(clip_cpus))
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	mutex_lock(&cpu_idle_cooling_lock);
> > +	get_online_cpus();
> > +	list_for_each_entry(idle_cooling_dev,
> > +		&cpu_idle_cooling_dev_list, node) {
> > +		if (cpumask_intersects(idle_cooling_dev->related_cpus,
> > +			clip_cpus)) {
> > +			ret = ERR_PTR(-EINVAL);
> > +			goto exit_unlock;
> > +		}
> > +	}
> > +
> > +	idle_cooling_dev = kzalloc(sizeof(*idle_cooling_dev), GFP_KERNEL);
> > +	if (!idle_cooling_dev) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_unlock;
> > +	}
> > +
> > +	if (!zalloc_cpumask_var(&idle_cooling_dev->related_cpus, GFP_KERNEL)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_free_dev;
> > +	}
> > +
> > +	if (!zalloc_cpumask_var(&idle_cooling_dev->injected_cpus, GFP_KERNEL)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_free_related_cpus;
> > +	}
> > +
> > +	cpumask_copy(idle_cooling_dev->related_cpus, clip_cpus);
> > +	cpu = cpumask_first(clip_cpus);
> > +	idle_cooling_dev->control_cpu = cpu;
> > +	idle_cooling_dev->id = topology_physical_package_id(cpu);
> > +	idle_cooling_dev->window_size = DEFAULT_WINDOW_SIZE;
> > +	idle_cooling_dev->duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
> > +
> > +	if (create_idle_thread(idle_cooling_dev)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_free_injected_cpus;
> > +	}
> > +
> > +	snprintf(dev_name, sizeof(dev_name), "thermal-cpuidle-%d",
> > +		 idle_cooling_dev->id);
> > +	ret = thermal_cooling_device_register(dev_name,
> > +					idle_cooling_dev,
> > +					&cpu_idle_injection_cooling_ops);
> > +	if (IS_ERR(ret))
> > +		goto exit_stop_thread;
> > +
> > +	idle_cooling_dev->cooling_dev = ret;
> > +
> > +	if (device_create_file(&idle_cooling_dev->cooling_dev->device,
> > +		&dev_attr_duration)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_unregister_cdev;
> > +	}
> > +
> > +	if (device_create_file(&idle_cooling_dev->cooling_dev->device,
> > +		&dev_attr_window_size)) {
> > +		ret = ERR_PTR(-ENOMEM);
> > +		goto exit_remove_duration_attr;
> > +	}
> > +
> > +	list_add(&idle_cooling_dev->node, &cpu_idle_cooling_dev_list);
> > +
> > +	goto exit_unlock;
> > +
> > +exit_remove_duration_attr:
> > +	device_remove_file(&idle_cooling_dev->cooling_dev->device,
> > +			&dev_attr_duration);
> > +exit_unregister_cdev:
> > +	thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
> > +exit_stop_thread:
> > +	stop_idle_thread(idle_cooling_dev);
> > +exit_free_injected_cpus:
> > +	free_cpumask_var(idle_cooling_dev->injected_cpus);
> > +exit_free_related_cpus:
> > +	free_cpumask_var(idle_cooling_dev->related_cpus);
> > +exit_free_dev:
> > +	kfree(idle_cooling_dev);
> > +exit_unlock:
> > +	put_online_cpus();
> > +	mutex_unlock(&cpu_idle_cooling_lock);
> > +	return ret;
> > +}
> > +
> > +void cpu_idle_cooling_unregister(struct thermal_cooling_device *cdev)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	if (IS_ERR_OR_NULL(cdev))
> > +		return;
> > +
> > +	idle_cooling_dev = cdev->devdata;
> > +
> > +	mutex_lock(&cpu_idle_cooling_lock);
> > +	get_online_cpus();
> > +	list_del(&idle_cooling_dev->node);
> > +	put_online_cpus();
> > +	mutex_unlock(&cpu_idle_cooling_lock);
> > +
> > +	device_remove_file(&cdev->device, &dev_attr_window_size);
> > +	device_remove_file(&cdev->device, &dev_attr_duration);
> > +	thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
> > +
> > +	stop_idle_thread(idle_cooling_dev);
> > +	free_cpumask_var(idle_cooling_dev->injected_cpus);
> > +	free_cpumask_var(idle_cooling_dev->related_cpus);
> > +	kfree(idle_cooling_dev);
> > +}
> > +
> > +static void __cpu_idle_cooling_exit(void)
> > +{
> > +	struct cpu_idle_cooling_device *idle_cooling_dev;
> > +
> > +	while (!list_empty(&cpu_idle_cooling_dev_list)) {
> > +		idle_cooling_dev = list_first_entry(&cpu_idle_cooling_dev_list,
> > +				struct cpu_idle_cooling_device, node);
> > +		cpu_idle_cooling_unregister(idle_cooling_dev->cooling_dev);
> > +	}
> > +
> > +	if (hp_state > 0)
> > +		cpuhp_remove_state_nocalls(hp_state);
> > +}
> > +
> > +static int __init cpu_idle_cooling_init(void)
> > +{
> > +	struct thermal_cooling_device *ret;
> > +	cpumask_t rest_cpu_mask = CPU_MASK_ALL;
> > +	const struct cpumask *register_cpu_mask;
> > +
> > +	hp_state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> > +			"thermal/cpu_idle_cooling:online",
> > +			idle_injection_cpu_online,
> > +			idle_injection_cpu_predown);
> > +	if (hp_state < 0)
> > +		return hp_state;
> > +
> > +	do {
> > +		register_cpu_mask =
> > +			topology_core_cpumask(cpumask_first(&rest_cpu_mask));
> > +
> > +		if (cpumask_empty(register_cpu_mask))
> > +			break;
> > +
> > +		ret = cpu_idle_cooling_register(register_cpu_mask);
> > +		if (IS_ERR(ret)) {
> > +			__cpu_idle_cooling_exit();
> > +			return -ENOMEM;
> > +		}
> > +	} while (cpumask_andnot(&rest_cpu_mask,
> > +				&rest_cpu_mask,
> > +				register_cpu_mask));
> > +
> > +	return 0;
> > +}
> > +module_init(cpu_idle_cooling_init);
> > +
> > +static void __exit cpu_idle_cooling_exit(void)
> > +{
> > +	__cpu_idle_cooling_exit();
> > +}
> > +module_exit(cpu_idle_cooling_exit);
> > +
> > +MODULE_LICENSE("GPL v2");
> > +MODULE_AUTHOR("Tao Wang <kevin.wangtao@hisilicon.com>");
> > +MODULE_DESCRIPTION("CPU Idle Cooling Driver for ARM Platform");
> > -- 
> > 1.7.9.5
> 
> -- 
> viresh

-- 

 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
       [not found]     ` <CAMBp1jM4KV5yj2=p=JBSfnw9u1D6qqtru49Rqp_8d3ePkHcE5Q@mail.gmail.com>
@ 2017-06-14 12:55       ` Daniel Lezcano
  2017-06-14 15:39         ` Vincent Guittot
  2017-06-15 15:55         ` Jean Wangtao
  0 siblings, 2 replies; 13+ messages in thread
From: Daniel Lezcano @ 2017-06-14 12:55 UTC (permalink / raw)
  To: Jean Wangtao
  Cc: Viresh Kumar, Tao Wang, rui.zhang, Eduardo Valentin,
	Amit Kachhap, javi.merino, linux-kernel, linux-pm,
	Sunzhaosheng Sun(Zhaosheng),
	Vincent Guittot

On Sat, Jun 10, 2017 at 08:00:28PM +0200, Jean Wangtao wrote:
> On 9 June 2017 at 10:20, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> 
> > On Tue, Jun 06, 2017 at 09:11:35AM +0530, viresh kumar wrote:
> > > + Daniel
> > >
> > > On 05-06-17, 17:07, Tao Wang wrote:
> > > > cpu idle cooling driver performs synchronized idle injection across
> > > > all cpu in same cluster, offers a new method to cooling down cpu,
> > > > that is similar to intel_power_clamp driver, but is basically
> > > > designed for ARM platform.
> > > > Each cluster has its own idle cooling device, each core has its own
> > > > idle injection thread, idle injection thread use play_idle to enter
> > > > idle. In order to reach deepest idle state, all cores are aligned by
> > > > jiffies. the injected idle ratio can be controlled through cooling
> > > > device interface.
> > > >
> > > > Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
> >
> > [ ... ]
> >
> > Hi Kevin,
> >
> > I'm failing to understand all the cpumask logic.
> >
> > Can you explain the rational?
> >
> > Thanks.
> >
> 
> This driver register cooling device for each cluster, so in module_init, we
> init rest_cpu_mask as all cpu, then pick first cpu of the rest_cpu_mask,
> set register_cpu_mask as all cpus in the cluster which contains the picked
> cpu, and register idle cooling device, clear the register_cpu_mask out of
> rest_cpu_mask after the register. repeat the process above until there are
> no cpu left.
> 
> In cpu_idle_cooling_register, we will check whether the input cpumask is
> empty or have intersection with registered devices(that seems to be
> unnecessary now, because the input is under control), the related_cpus is
> the copy of input cpumask, the injected_cpus is the same as related_cpus if
> there are no error happened during create_idle_thread(there is bug here if
> create_idle_thread return error we should call stop_idle_thread to destroy
> the idle injection thread already created).
> 
> In create_idle_thread, we create idle injection thread for each cpu in
> related_cpus, if operation success, set cpumask in injected_cpus.
> In stop_idle_thread, we destory idle injection thread for each cpu in
> injected_cpus which marks the threads we have created.
> 
> In get_cpu_idle_injection_dev, we go through all the registered devices to
> find one which contain the input cpu.
> In idle_injection_cpu_online, when a cpu plug in, if this cpu is the first
> cpu of the cluster or current control cpu of the cooling device is offline,
> set this cpu as the control cpu.
> In idle_injection_cpu_predown, when a cpu plug off, we set the first online
> cpu of injected_cpus as the control cpu.
> 
> In set_idle_state, the input cpu mask may cover several cooling device, so
> we go through all the registered devices, each device who's related_cpus is
> the subset of the input cpumask will execute idle injection with the input
> ratio.
> 
> I wish I have explained it clearly.
> 

Well, you explained what it does but I was expecting the why.

Is there any document describing in details your code or the logic?

For example why the following pseudo-code would it be wrong in place of the
cpumask dance?

[pseudo code]

cpumask_t cluster_id;

cpumask_clear(cluster_id);

for_each_cpu_possible(cpu) {

        if (cpumask_test_cpu(topology_physical_package_id(cpu]),
                                &cluster_id))
                continue;

        th_cool_dev = cpu_idle_cooling_register(cpumask_of(cpu));
        if (IS_ERR(th_cool_dev)
                goto rollback;

        cpumask_set(topology_physical_package_id(cpu], &cluster_id);
}


> > > > +struct thermal_cooling_device * __init
> > > > +cpu_idle_cooling_register(const struct cpumask *clip_cpus)
> > > > +{
> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
> > > > +   struct thermal_cooling_device *ret;
> > > > +   unsigned long cpu;
> > > > +   char dev_name[THERMAL_NAME_LENGTH];
> > > > +
> > > > +   if (cpumask_empty(clip_cpus))
> > > > +           return ERR_PTR(-ENOMEM);
> > > > +
> > > > +   mutex_lock(&cpu_idle_cooling_lock);
> > > > +   get_online_cpus();
> > > > +   list_for_each_entry(idle_cooling_dev,
> > > > +           &cpu_idle_cooling_dev_list, node) {
> > > > +           if (cpumask_intersects(idle_cooling_dev->related_cpus,
> > > > +                   clip_cpus)) {
> > > > +                   ret = ERR_PTR(-EINVAL);
> > > > +                   goto exit_unlock;
> > > > +           }
> > > > +   }
> > > > +
> > > > +   idle_cooling_dev = kzalloc(sizeof(*idle_cooling_dev), GFP_KERNEL);
> > > > +   if (!idle_cooling_dev) {
> > > > +           ret = ERR_PTR(-ENOMEM);
> > > > +           goto exit_unlock;
> > > > +   }
> > > > +
> > > > +   if (!zalloc_cpumask_var(&idle_cooling_dev->related_cpus,
> > GFP_KERNEL)) {
> > > > +           ret = ERR_PTR(-ENOMEM);
> > > > +           goto exit_free_dev;
> > > > +   }
> > > > +
> > > > +   if (!zalloc_cpumask_var(&idle_cooling_dev->injected_cpus,
> > GFP_KERNEL)) {
> > > > +           ret = ERR_PTR(-ENOMEM);
> > > > +           goto exit_free_related_cpus;
> > > > +   }
> > > > +
> > > > +   cpumask_copy(idle_cooling_dev->related_cpus, clip_cpus);
> > > > +   cpu = cpumask_first(clip_cpus);
> > > > +   idle_cooling_dev->control_cpu = cpu;
> > > > +   idle_cooling_dev->id = topology_physical_package_id(cpu);
> > > > +   idle_cooling_dev->window_size = DEFAULT_WINDOW_SIZE;
> > > > +   idle_cooling_dev->duration = jiffies_to_msecs(DEFAULT_
> > DURATION_JIFFIES);
> > > > +
> > > > +   if (create_idle_thread(idle_cooling_dev)) {
> > > > +           ret = ERR_PTR(-ENOMEM);
> > > > +           goto exit_free_injected_cpus;
> > > > +   }
> > > > +
> > > > +   snprintf(dev_name, sizeof(dev_name), "thermal-cpuidle-%d",
> > > > +            idle_cooling_dev->id);
> > > > +   ret = thermal_cooling_device_register(dev_name,
> > > > +                                   idle_cooling_dev,
> > > > +                                   &cpu_idle_injection_cooling_ops);
> > > > +   if (IS_ERR(ret))
> > > > +           goto exit_stop_thread;
> > > > +
> > > > +   idle_cooling_dev->cooling_dev = ret;
> > > > +
> > > > +   if (device_create_file(&idle_cooling_dev->cooling_dev->device,
> > > > +           &dev_attr_duration)) {
> > > > +           ret = ERR_PTR(-ENOMEM);
> > > > +           goto exit_unregister_cdev;
> > > > +   }
> > > > +
> > > > +   if (device_create_file(&idle_cooling_dev->cooling_dev->device,
> > > > +           &dev_attr_window_size)) {
> > > > +           ret = ERR_PTR(-ENOMEM);
> > > > +           goto exit_remove_duration_attr;
> > > > +   }
> > > > +
> > > > +   list_add(&idle_cooling_dev->node, &cpu_idle_cooling_dev_list);
> > > > +
> > > > +   goto exit_unlock;
> > > > +
> > > > +exit_remove_duration_attr:
> > > > +   device_remove_file(&idle_cooling_dev->cooling_dev->device,
> > > > +                   &dev_attr_duration);
> > > > +exit_unregister_cdev:
> > > > +   thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
> > > > +exit_stop_thread:
> > > > +   stop_idle_thread(idle_cooling_dev);
> > > > +exit_free_injected_cpus:
> > > > +   free_cpumask_var(idle_cooling_dev->injected_cpus);
> > > > +exit_free_related_cpus:
> > > > +   free_cpumask_var(idle_cooling_dev->related_cpus);
> > > > +exit_free_dev:
> > > > +   kfree(idle_cooling_dev);
> > > > +exit_unlock:
> > > > +   put_online_cpus();
> > > > +   mutex_unlock(&cpu_idle_cooling_lock);
> > > > +   return ret;
> > > > +}
> > > > +
> > > > +void cpu_idle_cooling_unregister(struct thermal_cooling_device *cdev)
> > > > +{
> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
> > > > +
> > > > +   if (IS_ERR_OR_NULL(cdev))
> > > > +           return;
> > > > +
> > > > +   idle_cooling_dev = cdev->devdata;
> > > > +
> > > > +   mutex_lock(&cpu_idle_cooling_lock);
> > > > +   get_online_cpus();
> > > > +   list_del(&idle_cooling_dev->node);
> > > > +   put_online_cpus();
> > > > +   mutex_unlock(&cpu_idle_cooling_lock);
> > > > +
> > > > +   device_remove_file(&cdev->device, &dev_attr_window_size);
> > > > +   device_remove_file(&cdev->device, &dev_attr_duration);
> > > > +   thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
> > > > +
> > > > +   stop_idle_thread(idle_cooling_dev);
> > > > +   free_cpumask_var(idle_cooling_dev->injected_cpus);
> > > > +   free_cpumask_var(idle_cooling_dev->related_cpus);
> > > > +   kfree(idle_cooling_dev);
> > > > +}
> > > > +
> > > > +static void __cpu_idle_cooling_exit(void)
> > > > +{
> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
> > > > +
> > > > +   while (!list_empty(&cpu_idle_cooling_dev_list)) {
> > > > +           idle_cooling_dev = list_first_entry(&cpu_idle_
> > cooling_dev_list,
> > > > +                           struct cpu_idle_cooling_device, node);
> > > > +           cpu_idle_cooling_unregister(idle_cooling_dev->cooling_dev)
> > ;
> > > > +   }
> > > > +
> > > > +   if (hp_state > 0)
> > > > +           cpuhp_remove_state_nocalls(hp_state);
> > > > +}
> > > > +
> > > > +static int __init cpu_idle_cooling_init(void)
> > > > +{
> > > > +   struct thermal_cooling_device *ret;
> > > > +   cpumask_t rest_cpu_mask = CPU_MASK_ALL;
> > > > +   const struct cpumask *register_cpu_mask;
> > > > +
> > > > +   hp_state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> > > > +                   "thermal/cpu_idle_cooling:online",
> > > > +                   idle_injection_cpu_online,
> > > > +                   idle_injection_cpu_predown);
> > > > +   if (hp_state < 0)
> > > > +           return hp_state;
> > > > +
> > > > +   do {
> > > > +           register_cpu_mask =
> > > > +                   topology_core_cpumask(cpumask_
> > first(&rest_cpu_mask));
> > > > +
> > > > +           if (cpumask_empty(register_cpu_mask))
> > > > +                   break;
> > > > +
> > > > +           ret = cpu_idle_cooling_register(register_cpu_mask);
> > > > +           if (IS_ERR(ret)) {
> > > > +                   __cpu_idle_cooling_exit();
> > > > +                   return -ENOMEM;
> > > > +           }
> > > > +   } while (cpumask_andnot(&rest_cpu_mask,
> > > > +                           &rest_cpu_mask,
> > > > +                           register_cpu_mask));
> > > > +
> > > > +   return 0;
> > > > +}
> > > > +module_init(cpu_idle_cooling_init);
> > > > +
> > > > +static void __exit cpu_idle_cooling_exit(void)
> > > > +{
> > > > +   __cpu_idle_cooling_exit();
> > > > +}
> > > > +module_exit(cpu_idle_cooling_exit);
> > > > +
> > > > +MODULE_LICENSE("GPL v2");
> > > > +MODULE_AUTHOR("Tao Wang <kevin.wangtao@hisilicon.com>");
> > > > +MODULE_DESCRIPTION("CPU Idle Cooling Driver for ARM Platform");
> > > > --
> > > > 1.7.9.5
> > >
> > > --
> > > viresh
> >
> > --
> >
> >  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
> >
> > Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> > <http://twitter.com/#!/linaroorg> Twitter |
> > <http://www.linaro.org/linaro-blog/> Blog
> >

-- 

 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-14 12:55       ` Daniel Lezcano
@ 2017-06-14 15:39         ` Vincent Guittot
  2017-06-15 15:55         ` Jean Wangtao
  1 sibling, 0 replies; 13+ messages in thread
From: Vincent Guittot @ 2017-06-14 15:39 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Jean Wangtao, Viresh Kumar, Tao Wang, rui.zhang,
	Eduardo Valentin, Amit Kachhap, javi.merino, linux-kernel,
	linux-pm, Sunzhaosheng Sun(Zhaosheng)

On 14 June 2017 at 14:55, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> On Sat, Jun 10, 2017 at 08:00:28PM +0200, Jean Wangtao wrote:
>> On 9 June 2017 at 10:20, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> > On Tue, Jun 06, 2017 at 09:11:35AM +0530, viresh kumar wrote:
>> > > + Daniel
>> > >
>> > > On 05-06-17, 17:07, Tao Wang wrote:
>> > > > cpu idle cooling driver performs synchronized idle injection across
>> > > > all cpu in same cluster, offers a new method to cooling down cpu,
>> > > > that is similar to intel_power_clamp driver, but is basically
>> > > > designed for ARM platform.
>> > > > Each cluster has its own idle cooling device, each core has its own
>> > > > idle injection thread, idle injection thread use play_idle to enter
>> > > > idle. In order to reach deepest idle state, all cores are aligned by
>> > > > jiffies. the injected idle ratio can be controlled through cooling
>> > > > device interface.
>> > > >
>> > > > Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
>> >
>> > [ ... ]
>> >
>> > Hi Kevin,
>> >
>> > I'm failing to understand all the cpumask logic.
>> >
>> > Can you explain the rational?
>> >
>> > Thanks.
>> >
>>
>> This driver register cooling device for each cluster, so in module_init, we
>> init rest_cpu_mask as all cpu, then pick first cpu of the rest_cpu_mask,
>> set register_cpu_mask as all cpus in the cluster which contains the picked
>> cpu, and register idle cooling device, clear the register_cpu_mask out of
>> rest_cpu_mask after the register. repeat the process above until there are
>> no cpu left.
>>
>> In cpu_idle_cooling_register, we will check whether the input cpumask is
>> empty or have intersection with registered devices(that seems to be
>> unnecessary now, because the input is under control), the related_cpus is
>> the copy of input cpumask, the injected_cpus is the same as related_cpus if
>> there are no error happened during create_idle_thread(there is bug here if
>> create_idle_thread return error we should call stop_idle_thread to destroy
>> the idle injection thread already created).
>>
>> In create_idle_thread, we create idle injection thread for each cpu in
>> related_cpus, if operation success, set cpumask in injected_cpus.
>> In stop_idle_thread, we destory idle injection thread for each cpu in
>> injected_cpus which marks the threads we have created.
>>
>> In get_cpu_idle_injection_dev, we go through all the registered devices to
>> find one which contain the input cpu.
>> In idle_injection_cpu_online, when a cpu plug in, if this cpu is the first
>> cpu of the cluster or current control cpu of the cooling device is offline,
>> set this cpu as the control cpu.
>> In idle_injection_cpu_predown, when a cpu plug off, we set the first online
>> cpu of injected_cpus as the control cpu.
>>
>> In set_idle_state, the input cpu mask may cover several cooling device, so
>> we go through all the registered devices, each device who's related_cpus is
>> the subset of the input cpumask will execute idle injection with the input
>> ratio.
>>
>> I wish I have explained it clearly.
>>
>
> Well, you explained what it does but I was expecting the why.

Is your question about why to create cooling device per cluster ?

In order to maximize power decrease, it's worth putting all CPUs in
the same cluster in idle simultaneously in order to power down the
cluster as well and remove more static power.
On HMP system, the power budget of each cluster is different (big
cores vs LITTLE cores). injecting idle on little cores cluster doesn't
give much impact compared to big cluster

>
> Is there any document describing in details your code or the logic?
>
> For example why the following pseudo-code would it be wrong in place of the
> cpumask dance?

Not sure to catch what you mean here ?

>
> [pseudo code]
>
> cpumask_t cluster_id;
>
> cpumask_clear(cluster_id);
>
> for_each_cpu_possible(cpu) {
>
>         if (cpumask_test_cpu(topology_physical_package_id(cpu]),
>                                 &cluster_id))
>                 continue;
>
>         th_cool_dev = cpu_idle_cooling_register(cpumask_of(cpu));
>         if (IS_ERR(th_cool_dev)
>                 goto rollback;
>
>         cpumask_set(topology_physical_package_id(cpu], &cluster_id);
> }
>
>
>> > > > +struct thermal_cooling_device * __init
>> > > > +cpu_idle_cooling_register(const struct cpumask *clip_cpus)
>> > > > +{
>> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
>> > > > +   struct thermal_cooling_device *ret;
>> > > > +   unsigned long cpu;
>> > > > +   char dev_name[THERMAL_NAME_LENGTH];
>> > > > +
>> > > > +   if (cpumask_empty(clip_cpus))
>> > > > +           return ERR_PTR(-ENOMEM);
>> > > > +
>> > > > +   mutex_lock(&cpu_idle_cooling_lock);
>> > > > +   get_online_cpus();
>> > > > +   list_for_each_entry(idle_cooling_dev,
>> > > > +           &cpu_idle_cooling_dev_list, node) {
>> > > > +           if (cpumask_intersects(idle_cooling_dev->related_cpus,
>> > > > +                   clip_cpus)) {
>> > > > +                   ret = ERR_PTR(-EINVAL);
>> > > > +                   goto exit_unlock;
>> > > > +           }
>> > > > +   }
>> > > > +
>> > > > +   idle_cooling_dev = kzalloc(sizeof(*idle_cooling_dev), GFP_KERNEL);
>> > > > +   if (!idle_cooling_dev) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_unlock;
>> > > > +   }
>> > > > +
>> > > > +   if (!zalloc_cpumask_var(&idle_cooling_dev->related_cpus,
>> > GFP_KERNEL)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_free_dev;
>> > > > +   }
>> > > > +
>> > > > +   if (!zalloc_cpumask_var(&idle_cooling_dev->injected_cpus,
>> > GFP_KERNEL)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_free_related_cpus;
>> > > > +   }
>> > > > +
>> > > > +   cpumask_copy(idle_cooling_dev->related_cpus, clip_cpus);
>> > > > +   cpu = cpumask_first(clip_cpus);
>> > > > +   idle_cooling_dev->control_cpu = cpu;
>> > > > +   idle_cooling_dev->id = topology_physical_package_id(cpu);
>> > > > +   idle_cooling_dev->window_size = DEFAULT_WINDOW_SIZE;
>> > > > +   idle_cooling_dev->duration = jiffies_to_msecs(DEFAULT_
>> > DURATION_JIFFIES);
>> > > > +
>> > > > +   if (create_idle_thread(idle_cooling_dev)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_free_injected_cpus;
>> > > > +   }
>> > > > +
>> > > > +   snprintf(dev_name, sizeof(dev_name), "thermal-cpuidle-%d",
>> > > > +            idle_cooling_dev->id);
>> > > > +   ret = thermal_cooling_device_register(dev_name,
>> > > > +                                   idle_cooling_dev,
>> > > > +                                   &cpu_idle_injection_cooling_ops);
>> > > > +   if (IS_ERR(ret))
>> > > > +           goto exit_stop_thread;
>> > > > +
>> > > > +   idle_cooling_dev->cooling_dev = ret;
>> > > > +
>> > > > +   if (device_create_file(&idle_cooling_dev->cooling_dev->device,
>> > > > +           &dev_attr_duration)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_unregister_cdev;
>> > > > +   }
>> > > > +
>> > > > +   if (device_create_file(&idle_cooling_dev->cooling_dev->device,
>> > > > +           &dev_attr_window_size)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_remove_duration_attr;
>> > > > +   }
>> > > > +
>> > > > +   list_add(&idle_cooling_dev->node, &cpu_idle_cooling_dev_list);
>> > > > +
>> > > > +   goto exit_unlock;
>> > > > +
>> > > > +exit_remove_duration_attr:
>> > > > +   device_remove_file(&idle_cooling_dev->cooling_dev->device,
>> > > > +                   &dev_attr_duration);
>> > > > +exit_unregister_cdev:
>> > > > +   thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
>> > > > +exit_stop_thread:
>> > > > +   stop_idle_thread(idle_cooling_dev);
>> > > > +exit_free_injected_cpus:
>> > > > +   free_cpumask_var(idle_cooling_dev->injected_cpus);
>> > > > +exit_free_related_cpus:
>> > > > +   free_cpumask_var(idle_cooling_dev->related_cpus);
>> > > > +exit_free_dev:
>> > > > +   kfree(idle_cooling_dev);
>> > > > +exit_unlock:
>> > > > +   put_online_cpus();
>> > > > +   mutex_unlock(&cpu_idle_cooling_lock);
>> > > > +   return ret;
>> > > > +}
>> > > > +
>> > > > +void cpu_idle_cooling_unregister(struct thermal_cooling_device *cdev)
>> > > > +{
>> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
>> > > > +
>> > > > +   if (IS_ERR_OR_NULL(cdev))
>> > > > +           return;
>> > > > +
>> > > > +   idle_cooling_dev = cdev->devdata;
>> > > > +
>> > > > +   mutex_lock(&cpu_idle_cooling_lock);
>> > > > +   get_online_cpus();
>> > > > +   list_del(&idle_cooling_dev->node);
>> > > > +   put_online_cpus();
>> > > > +   mutex_unlock(&cpu_idle_cooling_lock);
>> > > > +
>> > > > +   device_remove_file(&cdev->device, &dev_attr_window_size);
>> > > > +   device_remove_file(&cdev->device, &dev_attr_duration);
>> > > > +   thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
>> > > > +
>> > > > +   stop_idle_thread(idle_cooling_dev);
>> > > > +   free_cpumask_var(idle_cooling_dev->injected_cpus);
>> > > > +   free_cpumask_var(idle_cooling_dev->related_cpus);
>> > > > +   kfree(idle_cooling_dev);
>> > > > +}
>> > > > +
>> > > > +static void __cpu_idle_cooling_exit(void)
>> > > > +{
>> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
>> > > > +
>> > > > +   while (!list_empty(&cpu_idle_cooling_dev_list)) {
>> > > > +           idle_cooling_dev = list_first_entry(&cpu_idle_
>> > cooling_dev_list,
>> > > > +                           struct cpu_idle_cooling_device, node);
>> > > > +           cpu_idle_cooling_unregister(idle_cooling_dev->cooling_dev)
>> > ;
>> > > > +   }
>> > > > +
>> > > > +   if (hp_state > 0)
>> > > > +           cpuhp_remove_state_nocalls(hp_state);
>> > > > +}
>> > > > +
>> > > > +static int __init cpu_idle_cooling_init(void)
>> > > > +{
>> > > > +   struct thermal_cooling_device *ret;
>> > > > +   cpumask_t rest_cpu_mask = CPU_MASK_ALL;
>> > > > +   const struct cpumask *register_cpu_mask;
>> > > > +
>> > > > +   hp_state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
>> > > > +                   "thermal/cpu_idle_cooling:online",
>> > > > +                   idle_injection_cpu_online,
>> > > > +                   idle_injection_cpu_predown);
>> > > > +   if (hp_state < 0)
>> > > > +           return hp_state;
>> > > > +
>> > > > +   do {
>> > > > +           register_cpu_mask =
>> > > > +                   topology_core_cpumask(cpumask_
>> > first(&rest_cpu_mask));
>> > > > +
>> > > > +           if (cpumask_empty(register_cpu_mask))
>> > > > +                   break;
>> > > > +
>> > > > +           ret = cpu_idle_cooling_register(register_cpu_mask);
>> > > > +           if (IS_ERR(ret)) {
>> > > > +                   __cpu_idle_cooling_exit();
>> > > > +                   return -ENOMEM;
>> > > > +           }
>> > > > +   } while (cpumask_andnot(&rest_cpu_mask,
>> > > > +                           &rest_cpu_mask,
>> > > > +                           register_cpu_mask));
>> > > > +
>> > > > +   return 0;
>> > > > +}
>> > > > +module_init(cpu_idle_cooling_init);
>> > > > +
>> > > > +static void __exit cpu_idle_cooling_exit(void)
>> > > > +{
>> > > > +   __cpu_idle_cooling_exit();
>> > > > +}
>> > > > +module_exit(cpu_idle_cooling_exit);
>> > > > +
>> > > > +MODULE_LICENSE("GPL v2");
>> > > > +MODULE_AUTHOR("Tao Wang <kevin.wangtao@hisilicon.com>");
>> > > > +MODULE_DESCRIPTION("CPU Idle Cooling Driver for ARM Platform");
>> > > > --
>> > > > 1.7.9.5
>> > >
>> > > --
>> > > viresh
>> >
>> > --
>> >
>> >  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>> >
>> > Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
>> > <http://twitter.com/#!/linaroorg> Twitter |
>> > <http://www.linaro.org/linaro-blog/> Blog
>> >
>
> --
>
>  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver
  2017-06-14 12:55       ` Daniel Lezcano
  2017-06-14 15:39         ` Vincent Guittot
@ 2017-06-15 15:55         ` Jean Wangtao
  1 sibling, 0 replies; 13+ messages in thread
From: Jean Wangtao @ 2017-06-15 15:55 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: Viresh Kumar, Tao Wang, rui.zhang, Eduardo Valentin,
	Amit Kachhap, javi.merino, linux-kernel, linux-pm,
	Sunzhaosheng Sun(Zhaosheng),
	Vincent Guittot

On 14 June 2017 at 13:55, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> On Sat, Jun 10, 2017 at 08:00:28PM +0200, Jean Wangtao wrote:
>> On 9 June 2017 at 10:20, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>
>> > On Tue, Jun 06, 2017 at 09:11:35AM +0530, viresh kumar wrote:
>> > > + Daniel
>> > >
>> > > On 05-06-17, 17:07, Tao Wang wrote:
>> > > > cpu idle cooling driver performs synchronized idle injection across
>> > > > all cpu in same cluster, offers a new method to cooling down cpu,
>> > > > that is similar to intel_power_clamp driver, but is basically
>> > > > designed for ARM platform.
>> > > > Each cluster has its own idle cooling device, each core has its own
>> > > > idle injection thread, idle injection thread use play_idle to enter
>> > > > idle. In order to reach deepest idle state, all cores are aligned by
>> > > > jiffies. the injected idle ratio can be controlled through cooling
>> > > > device interface.
>> > > >
>> > > > Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
>> >
>> > [ ... ]
>> >
>> > Hi Kevin,
>> >
>> > I'm failing to understand all the cpumask logic.
>> >
>> > Can you explain the rational?
>> >
>> > Thanks.
>> >
>>
>> This driver register cooling device for each cluster, so in module_init, we
>> init rest_cpu_mask as all cpu, then pick first cpu of the rest_cpu_mask,
>> set register_cpu_mask as all cpus in the cluster which contains the picked
>> cpu, and register idle cooling device, clear the register_cpu_mask out of
>> rest_cpu_mask after the register. repeat the process above until there are
>> no cpu left.
>>
>> In cpu_idle_cooling_register, we will check whether the input cpumask is
>> empty or have intersection with registered devices(that seems to be
>> unnecessary now, because the input is under control), the related_cpus is
>> the copy of input cpumask, the injected_cpus is the same as related_cpus if
>> there are no error happened during create_idle_thread(there is bug here if
>> create_idle_thread return error we should call stop_idle_thread to destroy
>> the idle injection thread already created).
>>
>> In create_idle_thread, we create idle injection thread for each cpu in
>> related_cpus, if operation success, set cpumask in injected_cpus.
>> In stop_idle_thread, we destory idle injection thread for each cpu in
>> injected_cpus which marks the threads we have created.
>>
>> In get_cpu_idle_injection_dev, we go through all the registered devices to
>> find one which contain the input cpu.
>> In idle_injection_cpu_online, when a cpu plug in, if this cpu is the first
>> cpu of the cluster or current control cpu of the cooling device is offline,
>> set this cpu as the control cpu.
>> In idle_injection_cpu_predown, when a cpu plug off, we set the first online
>> cpu of injected_cpus as the control cpu.
>>
>> In set_idle_state, the input cpu mask may cover several cooling device, so
>> we go through all the registered devices, each device who's related_cpus is
>> the subset of the input cpumask will execute idle injection with the input
>> ratio.
>>
>> I wish I have explained it clearly.
>>
>
> Well, you explained what it does but I was expecting the why.
>
> Is there any document describing in details your code or the logic?

sorry, currently there is no document for the driver.

>
> For example why the following pseudo-code would it be wrong in place of the
> cpumask dance?
the pseudo-code also register cooling device for one cluster, but each
cooling device only contains one cpu not all cpu in the cluster.
I am not sure if I understand your question.
>
> [pseudo code]
>
> cpumask_t cluster_id;
>
> cpumask_clear(cluster_id);
>
> for_each_cpu_possible(cpu) {
>
>         if (cpumask_test_cpu(topology_physical_package_id(cpu]),
>                                 &cluster_id))
>                 continue;
>
>         th_cool_dev = cpu_idle_cooling_register(cpumask_of(cpu));
>         if (IS_ERR(th_cool_dev)
>                 goto rollback;
>
>         cpumask_set(topology_physical_package_id(cpu], &cluster_id);
> }
>
>
>> > > > +struct thermal_cooling_device * __init
>> > > > +cpu_idle_cooling_register(const struct cpumask *clip_cpus)
>> > > > +{
>> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
>> > > > +   struct thermal_cooling_device *ret;
>> > > > +   unsigned long cpu;
>> > > > +   char dev_name[THERMAL_NAME_LENGTH];
>> > > > +
>> > > > +   if (cpumask_empty(clip_cpus))
>> > > > +           return ERR_PTR(-ENOMEM);
>> > > > +
>> > > > +   mutex_lock(&cpu_idle_cooling_lock);
>> > > > +   get_online_cpus();
>> > > > +   list_for_each_entry(idle_cooling_dev,
>> > > > +           &cpu_idle_cooling_dev_list, node) {
>> > > > +           if (cpumask_intersects(idle_cooling_dev->related_cpus,
>> > > > +                   clip_cpus)) {
>> > > > +                   ret = ERR_PTR(-EINVAL);
>> > > > +                   goto exit_unlock;
>> > > > +           }
>> > > > +   }
>> > > > +
>> > > > +   idle_cooling_dev = kzalloc(sizeof(*idle_cooling_dev), GFP_KERNEL);
>> > > > +   if (!idle_cooling_dev) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_unlock;
>> > > > +   }
>> > > > +
>> > > > +   if (!zalloc_cpumask_var(&idle_cooling_dev->related_cpus,
>> > GFP_KERNEL)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_free_dev;
>> > > > +   }
>> > > > +
>> > > > +   if (!zalloc_cpumask_var(&idle_cooling_dev->injected_cpus,
>> > GFP_KERNEL)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_free_related_cpus;
>> > > > +   }
>> > > > +
>> > > > +   cpumask_copy(idle_cooling_dev->related_cpus, clip_cpus);
>> > > > +   cpu = cpumask_first(clip_cpus);
>> > > > +   idle_cooling_dev->control_cpu = cpu;
>> > > > +   idle_cooling_dev->id = topology_physical_package_id(cpu);
>> > > > +   idle_cooling_dev->window_size = DEFAULT_WINDOW_SIZE;
>> > > > +   idle_cooling_dev->duration = jiffies_to_msecs(DEFAULT_
>> > DURATION_JIFFIES);
>> > > > +
>> > > > +   if (create_idle_thread(idle_cooling_dev)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_free_injected_cpus;
>> > > > +   }
>> > > > +
>> > > > +   snprintf(dev_name, sizeof(dev_name), "thermal-cpuidle-%d",
>> > > > +            idle_cooling_dev->id);
>> > > > +   ret = thermal_cooling_device_register(dev_name,
>> > > > +                                   idle_cooling_dev,
>> > > > +                                   &cpu_idle_injection_cooling_ops);
>> > > > +   if (IS_ERR(ret))
>> > > > +           goto exit_stop_thread;
>> > > > +
>> > > > +   idle_cooling_dev->cooling_dev = ret;
>> > > > +
>> > > > +   if (device_create_file(&idle_cooling_dev->cooling_dev->device,
>> > > > +           &dev_attr_duration)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_unregister_cdev;
>> > > > +   }
>> > > > +
>> > > > +   if (device_create_file(&idle_cooling_dev->cooling_dev->device,
>> > > > +           &dev_attr_window_size)) {
>> > > > +           ret = ERR_PTR(-ENOMEM);
>> > > > +           goto exit_remove_duration_attr;
>> > > > +   }
>> > > > +
>> > > > +   list_add(&idle_cooling_dev->node, &cpu_idle_cooling_dev_list);
>> > > > +
>> > > > +   goto exit_unlock;
>> > > > +
>> > > > +exit_remove_duration_attr:
>> > > > +   device_remove_file(&idle_cooling_dev->cooling_dev->device,
>> > > > +                   &dev_attr_duration);
>> > > > +exit_unregister_cdev:
>> > > > +   thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
>> > > > +exit_stop_thread:
>> > > > +   stop_idle_thread(idle_cooling_dev);
>> > > > +exit_free_injected_cpus:
>> > > > +   free_cpumask_var(idle_cooling_dev->injected_cpus);
>> > > > +exit_free_related_cpus:
>> > > > +   free_cpumask_var(idle_cooling_dev->related_cpus);
>> > > > +exit_free_dev:
>> > > > +   kfree(idle_cooling_dev);
>> > > > +exit_unlock:
>> > > > +   put_online_cpus();
>> > > > +   mutex_unlock(&cpu_idle_cooling_lock);
>> > > > +   return ret;
>> > > > +}
>> > > > +
>> > > > +void cpu_idle_cooling_unregister(struct thermal_cooling_device *cdev)
>> > > > +{
>> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
>> > > > +
>> > > > +   if (IS_ERR_OR_NULL(cdev))
>> > > > +           return;
>> > > > +
>> > > > +   idle_cooling_dev = cdev->devdata;
>> > > > +
>> > > > +   mutex_lock(&cpu_idle_cooling_lock);
>> > > > +   get_online_cpus();
>> > > > +   list_del(&idle_cooling_dev->node);
>> > > > +   put_online_cpus();
>> > > > +   mutex_unlock(&cpu_idle_cooling_lock);
>> > > > +
>> > > > +   device_remove_file(&cdev->device, &dev_attr_window_size);
>> > > > +   device_remove_file(&cdev->device, &dev_attr_duration);
>> > > > +   thermal_cooling_device_unregister(idle_cooling_dev->cooling_dev);
>> > > > +
>> > > > +   stop_idle_thread(idle_cooling_dev);
>> > > > +   free_cpumask_var(idle_cooling_dev->injected_cpus);
>> > > > +   free_cpumask_var(idle_cooling_dev->related_cpus);
>> > > > +   kfree(idle_cooling_dev);
>> > > > +}
>> > > > +
>> > > > +static void __cpu_idle_cooling_exit(void)
>> > > > +{
>> > > > +   struct cpu_idle_cooling_device *idle_cooling_dev;
>> > > > +
>> > > > +   while (!list_empty(&cpu_idle_cooling_dev_list)) {
>> > > > +           idle_cooling_dev = list_first_entry(&cpu_idle_
>> > cooling_dev_list,
>> > > > +                           struct cpu_idle_cooling_device, node);
>> > > > +           cpu_idle_cooling_unregister(idle_cooling_dev->cooling_dev)
>> > ;
>> > > > +   }
>> > > > +
>> > > > +   if (hp_state > 0)
>> > > > +           cpuhp_remove_state_nocalls(hp_state);
>> > > > +}
>> > > > +
>> > > > +static int __init cpu_idle_cooling_init(void)
>> > > > +{
>> > > > +   struct thermal_cooling_device *ret;
>> > > > +   cpumask_t rest_cpu_mask = CPU_MASK_ALL;
>> > > > +   const struct cpumask *register_cpu_mask;
>> > > > +
>> > > > +   hp_state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
>> > > > +                   "thermal/cpu_idle_cooling:online",
>> > > > +                   idle_injection_cpu_online,
>> > > > +                   idle_injection_cpu_predown);
>> > > > +   if (hp_state < 0)
>> > > > +           return hp_state;
>> > > > +
>> > > > +   do {
>> > > > +           register_cpu_mask =
>> > > > +                   topology_core_cpumask(cpumask_
>> > first(&rest_cpu_mask));
>> > > > +
>> > > > +           if (cpumask_empty(register_cpu_mask))
>> > > > +                   break;
>> > > > +
>> > > > +           ret = cpu_idle_cooling_register(register_cpu_mask);
>> > > > +           if (IS_ERR(ret)) {
>> > > > +                   __cpu_idle_cooling_exit();
>> > > > +                   return -ENOMEM;
>> > > > +           }
>> > > > +   } while (cpumask_andnot(&rest_cpu_mask,
>> > > > +                           &rest_cpu_mask,
>> > > > +                           register_cpu_mask));
>> > > > +
>> > > > +   return 0;
>> > > > +}
>> > > > +module_init(cpu_idle_cooling_init);
>> > > > +
>> > > > +static void __exit cpu_idle_cooling_exit(void)
>> > > > +{
>> > > > +   __cpu_idle_cooling_exit();
>> > > > +}
>> > > > +module_exit(cpu_idle_cooling_exit);
>> > > > +
>> > > > +MODULE_LICENSE("GPL v2");
>> > > > +MODULE_AUTHOR("Tao Wang <kevin.wangtao@hisilicon.com>");
>> > > > +MODULE_DESCRIPTION("CPU Idle Cooling Driver for ARM Platform");
>> > > > --
>> > > > 1.7.9.5
>> > >
>> > > --
>> > > viresh
>> >
>> > --
>> >
>> >  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>> >
>> > Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
>> > <http://twitter.com/#!/linaroorg> Twitter |
>> > <http://www.linaro.org/linaro-blog/> Blog
>> >
>
> --
>
>  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-06-15 15:55 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-05  9:07 [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Tao Wang
2017-06-05  9:07 ` [PATCH RFC 2/2] thermal/cpu idle cooling: cpu idle cooling cooperate with cpu cooling Tao Wang
2017-06-06  3:41 ` [PATCH RFC 1/2] thermal/cpu idle cooling: Introduce cpu idle cooling driver Viresh Kumar
2017-06-07 21:50   ` Daniel Lezcano
2017-06-07 21:59     ` Rafael J. Wysocki
2017-06-08  7:52       ` Daniel Lezcano
2017-06-08 12:04   ` Daniel Lezcano
2017-06-09  8:20   ` Daniel Lezcano
     [not found]     ` <CAMBp1jM4KV5yj2=p=JBSfnw9u1D6qqtru49Rqp_8d3ePkHcE5Q@mail.gmail.com>
2017-06-14 12:55       ` Daniel Lezcano
2017-06-14 15:39         ` Vincent Guittot
2017-06-15 15:55         ` Jean Wangtao
2017-06-08  7:19 ` Vincent Guittot
     [not found]   ` <CAMBp1jNWrosu4vaFwqy4cKs69r3-zX_b06-09eitCcfteHBG2w@mail.gmail.com>
2017-06-08 13:42     ` Vincent Guittot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).