linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
@ 2019-12-03 17:32 Sumit Gupta
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver Sumit Gupta
                   ` (2 more replies)
  0 siblings, 3 replies; 40+ messages in thread
From: Sumit Gupta @ 2019-12-03 17:32 UTC (permalink / raw)
  To: rjw, viresh.kumar, catalin.marinas, will, thierry.reding,
	jonathanh, talho, linux-pm, linux-tegra, linux-arm-kernel,
	linux-kernel
  Cc: bbasu, sumitg, mperttunen

Adding new function of_tegra_bpmp_get() to get BPMP data.
This function can be used by other drivers like cpufreq to
get BPMP data without adding any property in respective
drivers DT node.

Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
---
 drivers/firmware/tegra/bpmp.c | 38 ++++++++++++++++++++++++++++++++++++++
 include/soc/tegra/bpmp.h      |  5 +++++
 2 files changed, 43 insertions(+)

diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index 6741fcd..9c3d7f1 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -38,6 +38,44 @@ channel_to_ops(struct tegra_bpmp_channel *channel)
 	return bpmp->soc->ops;
 }
 
+struct tegra_bpmp *of_tegra_bpmp_get(void)
+{
+	struct platform_device *pdev;
+	struct device_node *bpmp_dev;
+	struct tegra_bpmp *bpmp;
+
+	/* Check for bpmp device status in DT */
+	bpmp_dev = of_find_compatible_node(NULL, NULL, "nvidia,tegra186-bpmp");
+	if (!bpmp_dev) {
+		bpmp = ERR_PTR(-ENODEV);
+		goto err_out;
+	}
+	if (!of_device_is_available(bpmp_dev)) {
+		bpmp = ERR_PTR(-ENODEV);
+		goto err_put;
+	}
+
+	pdev = of_find_device_by_node(bpmp_dev);
+	if (!pdev) {
+		bpmp = ERR_PTR(-ENODEV);
+		goto err_put;
+	}
+
+	bpmp = platform_get_drvdata(pdev);
+	if (!bpmp) {
+		bpmp = ERR_PTR(-EPROBE_DEFER);
+		put_device(&pdev->dev);
+		goto err_put;
+	}
+
+	return bpmp;
+err_put:
+	of_node_put(bpmp_dev);
+err_out:
+	return bpmp;
+}
+EXPORT_SYMBOL_GPL(of_tegra_bpmp_get);
+
 struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
 {
 	struct platform_device *pdev;
diff --git a/include/soc/tegra/bpmp.h b/include/soc/tegra/bpmp.h
index f2604e9..21402d9 100644
--- a/include/soc/tegra/bpmp.h
+++ b/include/soc/tegra/bpmp.h
@@ -107,6 +107,7 @@ struct tegra_bpmp_message {
 };
 
 #if IS_ENABLED(CONFIG_TEGRA_BPMP)
+struct tegra_bpmp *of_tegra_bpmp_get(void);
 struct tegra_bpmp *tegra_bpmp_get(struct device *dev);
 void tegra_bpmp_put(struct tegra_bpmp *bpmp);
 int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
@@ -122,6 +123,10 @@ void tegra_bpmp_free_mrq(struct tegra_bpmp *bpmp, unsigned int mrq,
 			 void *data);
 bool tegra_bpmp_mrq_is_supported(struct tegra_bpmp *bpmp, unsigned int mrq);
 #else
+static inline struct tegra_bpmp *of_tegra_bpmp_get(void)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
 static inline struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
 {
 	return ERR_PTR(-ENOTSUPP);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-03 17:32 [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data Sumit Gupta
@ 2019-12-03 17:32 ` Sumit Gupta
  2019-12-04  5:40   ` Viresh Kumar
                     ` (3 more replies)
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 3/3] arm64: defconfig: Enable CONFIG_ARM_TEGRA194_CPUFREQ Sumit Gupta
  2019-12-03 17:42 ` [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data Thierry Reding
  2 siblings, 4 replies; 40+ messages in thread
From: Sumit Gupta @ 2019-12-03 17:32 UTC (permalink / raw)
  To: rjw, viresh.kumar, catalin.marinas, will, thierry.reding,
	jonathanh, talho, linux-pm, linux-tegra, linux-arm-kernel,
	linux-kernel
  Cc: bbasu, sumitg, mperttunen

Add support for CPU frequency scaling on Tegra194. The frequency
of each core can be adjusted by writing a clock divisor value to
an MSR on the core. The range of valid divisors is queried from
the BPMP.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
---
 drivers/cpufreq/Kconfig.arm        |   6 +
 drivers/cpufreq/Makefile           |   1 +
 drivers/cpufreq/tegra194-cpufreq.c | 423 +++++++++++++++++++++++++++++++++++++
 3 files changed, 430 insertions(+)
 create mode 100644 drivers/cpufreq/tegra194-cpufreq.c

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index a905796..4bcd47c 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -320,6 +320,12 @@ config ARM_TEGRA186_CPUFREQ
 	help
 	  This adds the CPUFreq driver support for Tegra186 SOCs.
 
+config ARM_TEGRA194_CPUFREQ
+	tristate "Tegra194 CPUFreq support"
+	depends on ARCH_TEGRA && TEGRA_BPMP
+	help
+	  This adds CPU frequency driver support for Tegra194 SOCs.
+
 config ARM_TI_CPUFREQ
 	bool "Texas Instruments CPUFreq support"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9a9f5cc..433d492 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_ARM_TANGO_CPUFREQ)		+= tango-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA186_CPUFREQ)	+= tegra186-cpufreq.o
+obj-$(CONFIG_ARM_TEGRA194_CPUFREQ)	+= tegra194-cpufreq.o
 obj-$(CONFIG_ARM_TI_CPUFREQ)		+= ti-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 
diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
new file mode 100644
index 0000000..9df12f4
--- /dev/null
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/smp_plat.h>
+
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+
+#define KHZ                     1000
+#define REF_CLK_MHZ             408 /* 408 MHz */
+#define US_DELAY                2000
+#define US_DELAY_MIN            2
+#define CPUFREQ_TBL_STEP_HZ     (50 * KHZ * KHZ)
+#define MAX_CNT                 ~0U
+
+/* cpufreq transisition latency */
+#define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */
+
+enum cluster {
+	CLUSTER0,
+	CLUSTER1,
+	CLUSTER2,
+	CLUSTER3,
+	MAX_CLUSTERS,
+};
+
+struct tegra194_cpufreq_data {
+	void __iomem *regs;
+	size_t num_clusters;
+	struct cpufreq_frequency_table **tables;
+};
+
+static DEFINE_MUTEX(cpufreq_lock);
+
+struct tegra_cpu_ctr {
+	u32 cpu;
+	u32 delay;
+	u32 coreclk_cnt, last_coreclk_cnt;
+	u32 refclk_cnt, last_refclk_cnt;
+};
+
+static struct workqueue_struct *read_counters_wq;
+struct read_counters_work {
+	struct work_struct work;
+	struct tegra_cpu_ctr c;
+};
+
+static enum cluster get_cpu_cluster(u8 cpu)
+{
+	return MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+}
+
+/*
+ * Read per-core Read-only system register NVFREQ_FEEDBACK_EL1.
+ * The register provides frequency feedback information to
+ * determine the average actual frequency a core has run at over
+ * a period of time.
+ *	[31:0] PLLP counter: Counts at fixed frequency (408 MHz)
+ *	[63:32] Core clock counter: counts on every core clock cycle
+ *			where the core is architecturally clocking
+ */
+static u64 read_freq_feedback(void)
+{
+	u64 val = 0;
+
+	asm volatile("mrs %0, s3_0_c15_c0_5" : "=r" (val) : );
+
+	return val;
+}
+
+u16 map_freq_to_ndiv(struct mrq_cpu_ndiv_limits_response *nltbl, u32 freq)
+{
+	return DIV_ROUND_UP(freq * nltbl->pdiv * nltbl->mdiv,
+			    nltbl->ref_clk_hz / KHZ);
+}
+
+static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response
+				   *nltbl, u16 ndiv)
+{
+	return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv);
+}
+
+static void tegra_read_counters(struct work_struct *work)
+{
+	struct read_counters_work *read_counters_work;
+	struct tegra_cpu_ctr *c;
+	u64 val;
+
+	/*
+	 * ref_clk_counter(32 bit counter) runs on constant clk,
+	 * pll_p(408MHz).
+	 * It will take = 2 ^ 32 / 408 MHz to overflow ref clk counter
+	 *              = 10526880 usec = 10.527 sec to overflow
+	 *
+	 * Like wise core_clk_counter(32 bit counter) runs on core clock.
+	 * It's synchronized to crab_clk (cpu_crab_clk) which runs at
+	 * freq of cluster. Assuming max cluster clock ~2000MHz,
+	 * It will take = 2 ^ 32 / 2000 MHz to overflow core clk counter
+	 *              = ~2.147 sec to overflow
+	 */
+	read_counters_work = container_of(work, struct read_counters_work,
+					  work);
+	c = &read_counters_work->c;
+
+	val = read_freq_feedback();
+	c->last_refclk_cnt = lower_32_bits(val);
+	c->last_coreclk_cnt = upper_32_bits(val);
+	udelay(c->delay);
+	val = read_freq_feedback();
+	c->refclk_cnt = lower_32_bits(val);
+	c->coreclk_cnt = upper_32_bits(val);
+}
+
+/*
+ * Return instantaneous cpu speed
+ * Instantaneous freq is calculated as -
+ * -Takes sample on every query of getting the freq.
+ *	- Read core and ref clock counters;
+ *	- Delay for X us
+ *	- Read above cycle counters again
+ *	- Calculates freq by subtracting current and previous counters
+ *	  divided by the delay time or eqv. of ref_clk_counter in delta time
+ *	- Return Kcycles/second, freq in KHz
+ *
+ *	delta time period = x sec
+ *			  = delta ref_clk_counter / (408 * 10^6) sec
+ *	freq in Hz = cycles/sec
+ *		   = (delta cycles / x sec
+ *		   = (delta cycles * 408 * 10^6) / delta ref_clk_counter
+ *	in KHz	   = (delta cycles * 408 * 10^3) / delta ref_clk_counter
+ *
+ * @cpu - logical cpu whose freq to be updated
+ * Returns freq in KHz on success, 0 if cpu is offline
+ */
+static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
+{
+	struct read_counters_work read_counters_work;
+	struct tegra_cpu_ctr c;
+	u32 delta_refcnt;
+	u32 delta_ccnt;
+	u32 rate_mhz;
+
+	read_counters_work.c.cpu = cpu;
+	read_counters_work.c.delay = delay;
+	INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
+	queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
+	flush_work(&read_counters_work.work);
+	c = read_counters_work.c;
+
+	if (c.coreclk_cnt < c.last_coreclk_cnt)
+		delta_ccnt = c.coreclk_cnt + (MAX_CNT - c.last_coreclk_cnt);
+	else
+		delta_ccnt = c.coreclk_cnt - c.last_coreclk_cnt;
+	if (!delta_ccnt)
+		return 0;
+
+	/* ref clock is 32 bits */
+	if (c.refclk_cnt < c.last_refclk_cnt)
+		delta_refcnt = c.refclk_cnt + (MAX_CNT - c.last_refclk_cnt);
+	else
+		delta_refcnt = c.refclk_cnt - c.last_refclk_cnt;
+	if (!delta_refcnt) {
+		pr_debug("cpufreq: %d is idle, delta_refcnt: 0\n", cpu);
+		return 0;
+	}
+	rate_mhz = ((unsigned long)(delta_ccnt * REF_CLK_MHZ)) / delta_refcnt;
+
+	return (rate_mhz * KHZ); /* in KHz */
+}
+
+static unsigned int tegra194_get_speed(u32 cpu)
+{
+	return tegra194_get_speed_common(cpu, US_DELAY);
+}
+
+static unsigned int tegra194_fast_get_speed(u32 cpu)
+{
+	return tegra194_get_speed_common(cpu, US_DELAY_MIN);
+}
+
+static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
+{
+	struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
+	int cluster = get_cpu_cluster(policy->cpu);
+
+	if (cluster >= data->num_clusters)
+		return -EINVAL;
+
+	policy->cur = tegra194_fast_get_speed(policy->cpu); /* boot freq */
+
+	/* set same policy for all cpus */
+	cpumask_copy(policy->cpus, cpu_possible_mask);
+
+	policy->freq_table = data->tables[cluster];
+	policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY;
+
+	return 0;
+}
+
+static void set_cpu_ndiv(void *data)
+{
+	struct cpufreq_frequency_table *tbl = data;
+	u64 ndiv_val = (u64)tbl->driver_data;
+
+	asm volatile("msr s3_0_c15_c0_4, %0" : : "r" (ndiv_val));
+}
+
+static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
+				       unsigned int index)
+{
+	struct cpufreq_frequency_table *tbl = policy->freq_table + index;
+	static struct cpufreq_freqs freqs;
+
+	mutex_lock(&cpufreq_lock);
+	freqs.old = policy->cur;
+	freqs.new = tbl->frequency;
+
+	cpufreq_freq_transition_begin(policy, &freqs);
+	on_each_cpu_mask(policy->cpus, set_cpu_ndiv, tbl, true);
+	cpufreq_freq_transition_end(policy, &freqs, 0);
+
+	mutex_unlock(&cpufreq_lock);
+
+	return 0;
+}
+
+static struct cpufreq_driver tegra194_cpufreq_driver = {
+	.name = "tegra194",
+	.flags = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS |
+		CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_ASYNC_NOTIFICATION,
+	.verify = cpufreq_generic_frequency_table_verify,
+	.target_index = tegra194_cpufreq_set_target,
+	.get = tegra194_get_speed,
+	.init = tegra194_cpufreq_init,
+	.attr = cpufreq_generic_attr,
+};
+
+static void tegra194_cpufreq_free_resources(void)
+{
+	flush_workqueue(read_counters_wq);
+	destroy_workqueue(read_counters_wq);
+}
+
+static struct cpufreq_frequency_table *init_freq_table
+		(struct platform_device *pdev, struct tegra_bpmp *bpmp,
+		 unsigned int cluster_id)
+{
+	struct cpufreq_frequency_table *opp_table;
+	struct mrq_cpu_ndiv_limits_response resp;
+	unsigned int num_freqs, ndiv, delta_ndiv;
+	struct mrq_cpu_ndiv_limits_request req;
+	struct tegra_bpmp_message msg;
+	u16 freq_table_step_size;
+	int err, index;
+
+	memset(&req, 0, sizeof(req));
+	req.cluster_id = cluster_id;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_CPU_NDIV_LIMITS;
+	msg.tx.data = &req;
+	msg.tx.size = sizeof(req);
+	msg.rx.data = &resp;
+	msg.rx.size = sizeof(resp);
+
+	err = tegra_bpmp_transfer(bpmp, &msg);
+	if (err)
+		return ERR_PTR(err);
+
+	/*
+	 * Make sure frequency table step is a multiple of mdiv to match
+	 * vhint table granularity.
+	 */
+	freq_table_step_size = resp.mdiv *
+			DIV_ROUND_UP(CPUFREQ_TBL_STEP_HZ, resp.ref_clk_hz);
+
+	dev_dbg(&pdev->dev, "cluster %d: frequency table step size: %d\n",
+		cluster_id, freq_table_step_size);
+
+	delta_ndiv = resp.ndiv_max - resp.ndiv_min;
+
+	if (unlikely(delta_ndiv == 0))
+		num_freqs = 1;
+	else
+		/* We store both ndiv_min and ndiv_max hence the +1 */
+		num_freqs = delta_ndiv / freq_table_step_size + 1;
+
+	num_freqs += (delta_ndiv % freq_table_step_size) ? 1 : 0;
+
+	opp_table = devm_kcalloc(&pdev->dev, num_freqs + 1, sizeof(*opp_table),
+				 GFP_KERNEL);
+	if (!opp_table)
+		return ERR_PTR(-ENOMEM);
+
+	for (index = 0, ndiv = resp.ndiv_min;
+			ndiv < resp.ndiv_max;
+			index++, ndiv += freq_table_step_size) {
+		opp_table[index].driver_data = ndiv;
+		opp_table[index].frequency = map_ndiv_to_freq(&resp, ndiv);
+	}
+
+	opp_table[index].driver_data = resp.ndiv_max;
+	opp_table[index++].frequency = map_ndiv_to_freq(&resp, resp.ndiv_max);
+	opp_table[index].frequency = CPUFREQ_TABLE_END;
+
+	return opp_table;
+}
+
+static int tegra194_cpufreq_probe(struct platform_device *pdev)
+{
+	struct tegra194_cpufreq_data *data;
+	struct tegra_bpmp *bpmp;
+	int err, i;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->num_clusters = MAX_CLUSTERS;
+	data->tables = devm_kcalloc(&pdev->dev, data->num_clusters,
+				    sizeof(*data->tables), GFP_KERNEL);
+	if (!data->tables)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, data);
+
+	read_counters_wq = alloc_workqueue("read_counters_wq", __WQ_LEGACY, 1);
+	if (!read_counters_wq) {
+		dev_err(&pdev->dev, "fail to create_workqueue\n");
+		return -EINVAL;
+	}
+
+	bpmp = of_tegra_bpmp_get();
+	if (IS_ERR(bpmp)) {
+		err = PTR_ERR(bpmp);
+		goto err_free_res;
+	}
+
+	for (i = 0; i < data->num_clusters; i++) {
+		data->tables[i] = init_freq_table(pdev, bpmp, i);
+		if (IS_ERR(data->tables[i])) {
+			err = PTR_ERR(data->tables[i]);
+			goto put_bpmp;
+		}
+	}
+
+	tegra_bpmp_put(bpmp);
+
+	tegra194_cpufreq_driver.driver_data = data;
+
+	err = cpufreq_register_driver(&tegra194_cpufreq_driver);
+	if (err)
+		goto err_free_res;
+
+	return err;
+
+put_bpmp:
+	tegra_bpmp_put(bpmp);
+err_free_res:
+	tegra194_cpufreq_free_resources();
+	return err;
+}
+
+static int tegra194_cpufreq_remove(struct platform_device *pdev)
+{
+	cpufreq_unregister_driver(&tegra194_cpufreq_driver);
+	tegra194_cpufreq_free_resources();
+
+	return 0;
+}
+
+static struct platform_driver tegra194_cpufreq_platform_driver = {
+	.driver = {
+		.name = "tegra194-cpufreq",
+	},
+	.probe = tegra194_cpufreq_probe,
+	.remove = tegra194_cpufreq_remove,
+};
+
+static int __init tegra_cpufreq_init(void)
+{
+	struct platform_device *pdev;
+	int ret;
+
+	if (!of_machine_is_compatible("nvidia,tegra194"))
+		return -ENODEV;
+
+	ret = platform_driver_register(&tegra194_cpufreq_platform_driver);
+	if (ret)
+		return ret;
+
+	pdev = platform_device_register_simple("tegra194-cpufreq", -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		platform_driver_unregister(&tegra194_cpufreq_platform_driver);
+		return PTR_ERR(pdev);
+	}
+
+	return 0;
+}
+module_init(tegra_cpufreq_init);
+
+static void __exit tegra_cpufreq_exit(void)
+{
+	platform_driver_unregister(&tegra194_cpufreq_platform_driver);
+}
+module_exit(tegra_cpufreq_exit);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_AUTHOR("Sumit Gupta <sumitg@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra194 cpufreq driver");
+MODULE_LICENSE("GPL v2");
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [TEGRA194_CPUFREQ Patch 3/3] arm64: defconfig: Enable CONFIG_ARM_TEGRA194_CPUFREQ
  2019-12-03 17:32 [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data Sumit Gupta
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver Sumit Gupta
@ 2019-12-03 17:32 ` Sumit Gupta
  2019-12-03 17:42 ` [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data Thierry Reding
  2 siblings, 0 replies; 40+ messages in thread
From: Sumit Gupta @ 2019-12-03 17:32 UTC (permalink / raw)
  To: rjw, viresh.kumar, catalin.marinas, will, thierry.reding,
	jonathanh, talho, linux-pm, linux-tegra, linux-arm-kernel,
	linux-kernel
  Cc: bbasu, sumitg, mperttunen

Enable Tegra194 CPU frequency scaling support by default.

Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 995a15f..c8e818b 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -87,6 +87,7 @@ CONFIG_ARM_SCPI_CPUFREQ=y
 CONFIG_ARM_IMX_CPUFREQ_DT=m
 CONFIG_ARM_RASPBERRYPI_CPUFREQ=m
 CONFIG_ARM_TEGRA186_CPUFREQ=y
+CONFIG_ARM_TEGRA194_CPUFREQ=y
 CONFIG_ARM_SCPI_PROTOCOL=y
 CONFIG_RASPBERRYPI_FIRMWARE=y
 CONFIG_INTEL_STRATIX10_SERVICE=y
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2019-12-03 17:32 [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data Sumit Gupta
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver Sumit Gupta
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 3/3] arm64: defconfig: Enable CONFIG_ARM_TEGRA194_CPUFREQ Sumit Gupta
@ 2019-12-03 17:42 ` Thierry Reding
  2019-12-04  8:45   ` Mikko Perttunen
  2 siblings, 1 reply; 40+ messages in thread
From: Thierry Reding @ 2019-12-03 17:42 UTC (permalink / raw)
  To: Sumit Gupta
  Cc: rjw, viresh.kumar, catalin.marinas, will, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

[-- Attachment #1: Type: text/plain, Size: 2886 bytes --]

On Tue, Dec 03, 2019 at 11:02:26PM +0530, Sumit Gupta wrote:
> Adding new function of_tegra_bpmp_get() to get BPMP data.
> This function can be used by other drivers like cpufreq to
> get BPMP data without adding any property in respective
> drivers DT node.

What's wrong with adding the property in the DT node? We already do that
for Tegra186's CPU frequency driver, so it makes sense to continue that
for Tegra194.

Thierry

> Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
> ---
>  drivers/firmware/tegra/bpmp.c | 38 ++++++++++++++++++++++++++++++++++++++
>  include/soc/tegra/bpmp.h      |  5 +++++
>  2 files changed, 43 insertions(+)
> 
> diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
> index 6741fcd..9c3d7f1 100644
> --- a/drivers/firmware/tegra/bpmp.c
> +++ b/drivers/firmware/tegra/bpmp.c
> @@ -38,6 +38,44 @@ channel_to_ops(struct tegra_bpmp_channel *channel)
>  	return bpmp->soc->ops;
>  }
>  
> +struct tegra_bpmp *of_tegra_bpmp_get(void)
> +{
> +	struct platform_device *pdev;
> +	struct device_node *bpmp_dev;
> +	struct tegra_bpmp *bpmp;
> +
> +	/* Check for bpmp device status in DT */
> +	bpmp_dev = of_find_compatible_node(NULL, NULL, "nvidia,tegra186-bpmp");
> +	if (!bpmp_dev) {
> +		bpmp = ERR_PTR(-ENODEV);
> +		goto err_out;
> +	}
> +	if (!of_device_is_available(bpmp_dev)) {
> +		bpmp = ERR_PTR(-ENODEV);
> +		goto err_put;
> +	}
> +
> +	pdev = of_find_device_by_node(bpmp_dev);
> +	if (!pdev) {
> +		bpmp = ERR_PTR(-ENODEV);
> +		goto err_put;
> +	}
> +
> +	bpmp = platform_get_drvdata(pdev);
> +	if (!bpmp) {
> +		bpmp = ERR_PTR(-EPROBE_DEFER);
> +		put_device(&pdev->dev);
> +		goto err_put;
> +	}
> +
> +	return bpmp;
> +err_put:
> +	of_node_put(bpmp_dev);
> +err_out:
> +	return bpmp;
> +}
> +EXPORT_SYMBOL_GPL(of_tegra_bpmp_get);
> +
>  struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
>  {
>  	struct platform_device *pdev;
> diff --git a/include/soc/tegra/bpmp.h b/include/soc/tegra/bpmp.h
> index f2604e9..21402d9 100644
> --- a/include/soc/tegra/bpmp.h
> +++ b/include/soc/tegra/bpmp.h
> @@ -107,6 +107,7 @@ struct tegra_bpmp_message {
>  };
>  
>  #if IS_ENABLED(CONFIG_TEGRA_BPMP)
> +struct tegra_bpmp *of_tegra_bpmp_get(void);
>  struct tegra_bpmp *tegra_bpmp_get(struct device *dev);
>  void tegra_bpmp_put(struct tegra_bpmp *bpmp);
>  int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
> @@ -122,6 +123,10 @@ void tegra_bpmp_free_mrq(struct tegra_bpmp *bpmp, unsigned int mrq,
>  			 void *data);
>  bool tegra_bpmp_mrq_is_supported(struct tegra_bpmp *bpmp, unsigned int mrq);
>  #else
> +static inline struct tegra_bpmp *of_tegra_bpmp_get(void)
> +{
> +	return ERR_PTR(-ENOTSUPP);
> +}
>  static inline struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
>  {
>  	return ERR_PTR(-ENOTSUPP);
> -- 
> 2.7.4
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver Sumit Gupta
@ 2019-12-04  5:40   ` Viresh Kumar
  2019-12-04 10:55     ` sumitg
  2019-12-04 13:59   ` Dmitry Osipenko
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2019-12-04  5:40 UTC (permalink / raw)
  To: Sumit Gupta
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

Hi Sumit,

On 03-12-19, 23:02, Sumit Gupta wrote:
> Add support for CPU frequency scaling on Tegra194. The frequency
> of each core can be adjusted by writing a clock divisor value to
> an MSR on the core. The range of valid divisors is queried from
> the BPMP.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
> ---
>  drivers/cpufreq/Kconfig.arm        |   6 +
>  drivers/cpufreq/Makefile           |   1 +
>  drivers/cpufreq/tegra194-cpufreq.c | 423 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 430 insertions(+)
>  create mode 100644 drivers/cpufreq/tegra194-cpufreq.c

Overall these are the things that you are doing here in the driver:

- open coded clk_{get|set}_rate(), why can't you implement a clock
  driver for the CPU and use the clk framework? You may not need the
  (hacky) work-queue usage then probably.

- populating cpufreq table, you can probably add OPPs instead using
  the same mechanism

- And then you can reuse the cpufreq-dt driver for your platform as
  well, as is the case for few other tegra platforms.

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2019-12-03 17:42 ` [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data Thierry Reding
@ 2019-12-04  8:45   ` Mikko Perttunen
  2019-12-04  9:17     ` Viresh Kumar
  0 siblings, 1 reply; 40+ messages in thread
From: Mikko Perttunen @ 2019-12-04  8:45 UTC (permalink / raw)
  To: Thierry Reding, Sumit Gupta
  Cc: rjw, viresh.kumar, catalin.marinas, will, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

The difference here is that whereas on Tegra186 the frequency is managed 
through a specific memory-mapped device, on Tegra194 the frequency is 
managed through a CPU MSR leaving no "specific" node for this property 
apart from the cpu nodes itself.

Now, my original patchset (which this series is based on) did add 
nvidia,bpmp properties on the CPU DT nodes itself and query BPMP based 
on that. A change is still required for that since tegra_bpmp_get() 
currently takes a 'struct device *' which we don't have for a CPU DT node.

Mikko

On 3.12.2019 19.42, Thierry Reding wrote:
> On Tue, Dec 03, 2019 at 11:02:26PM +0530, Sumit Gupta wrote:
>> Adding new function of_tegra_bpmp_get() to get BPMP data.
>> This function can be used by other drivers like cpufreq to
>> get BPMP data without adding any property in respective
>> drivers DT node.
> 
> What's wrong with adding the property in the DT node? We already do that
> for Tegra186's CPU frequency driver, so it makes sense to continue that
> for Tegra194.
> 
> Thierry
> 
>> Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
>> ---
>>   drivers/firmware/tegra/bpmp.c | 38 ++++++++++++++++++++++++++++++++++++++
>>   include/soc/tegra/bpmp.h      |  5 +++++
>>   2 files changed, 43 insertions(+)
>>
>> diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
>> index 6741fcd..9c3d7f1 100644
>> --- a/drivers/firmware/tegra/bpmp.c
>> +++ b/drivers/firmware/tegra/bpmp.c
>> @@ -38,6 +38,44 @@ channel_to_ops(struct tegra_bpmp_channel *channel)
>>   	return bpmp->soc->ops;
>>   }
>>   
>> +struct tegra_bpmp *of_tegra_bpmp_get(void)
>> +{
>> +	struct platform_device *pdev;
>> +	struct device_node *bpmp_dev;
>> +	struct tegra_bpmp *bpmp;
>> +
>> +	/* Check for bpmp device status in DT */
>> +	bpmp_dev = of_find_compatible_node(NULL, NULL, "nvidia,tegra186-bpmp");
>> +	if (!bpmp_dev) {
>> +		bpmp = ERR_PTR(-ENODEV);
>> +		goto err_out;
>> +	}
>> +	if (!of_device_is_available(bpmp_dev)) {
>> +		bpmp = ERR_PTR(-ENODEV);
>> +		goto err_put;
>> +	}
>> +
>> +	pdev = of_find_device_by_node(bpmp_dev);
>> +	if (!pdev) {
>> +		bpmp = ERR_PTR(-ENODEV);
>> +		goto err_put;
>> +	}
>> +
>> +	bpmp = platform_get_drvdata(pdev);
>> +	if (!bpmp) {
>> +		bpmp = ERR_PTR(-EPROBE_DEFER);
>> +		put_device(&pdev->dev);
>> +		goto err_put;
>> +	}
>> +
>> +	return bpmp;
>> +err_put:
>> +	of_node_put(bpmp_dev);
>> +err_out:
>> +	return bpmp;
>> +}
>> +EXPORT_SYMBOL_GPL(of_tegra_bpmp_get);
>> +
>>   struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
>>   {
>>   	struct platform_device *pdev;
>> diff --git a/include/soc/tegra/bpmp.h b/include/soc/tegra/bpmp.h
>> index f2604e9..21402d9 100644
>> --- a/include/soc/tegra/bpmp.h
>> +++ b/include/soc/tegra/bpmp.h
>> @@ -107,6 +107,7 @@ struct tegra_bpmp_message {
>>   };
>>   
>>   #if IS_ENABLED(CONFIG_TEGRA_BPMP)
>> +struct tegra_bpmp *of_tegra_bpmp_get(void);
>>   struct tegra_bpmp *tegra_bpmp_get(struct device *dev);
>>   void tegra_bpmp_put(struct tegra_bpmp *bpmp);
>>   int tegra_bpmp_transfer_atomic(struct tegra_bpmp *bpmp,
>> @@ -122,6 +123,10 @@ void tegra_bpmp_free_mrq(struct tegra_bpmp *bpmp, unsigned int mrq,
>>   			 void *data);
>>   bool tegra_bpmp_mrq_is_supported(struct tegra_bpmp *bpmp, unsigned int mrq);
>>   #else
>> +static inline struct tegra_bpmp *of_tegra_bpmp_get(void)
>> +{
>> +	return ERR_PTR(-ENOTSUPP);
>> +}
>>   static inline struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
>>   {
>>   	return ERR_PTR(-ENOTSUPP);
>> -- 
>> 2.7.4
>>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2019-12-04  8:45   ` Mikko Perttunen
@ 2019-12-04  9:17     ` Viresh Kumar
  2019-12-04  9:33       ` Thierry Reding
  2019-12-04 10:21       ` Mikko Perttunen
  0 siblings, 2 replies; 40+ messages in thread
From: Viresh Kumar @ 2019-12-04  9:17 UTC (permalink / raw)
  To: Mikko Perttunen
  Cc: Thierry Reding, Sumit Gupta, rjw, catalin.marinas, will,
	jonathanh, talho, linux-pm, linux-tegra, linux-arm-kernel,
	linux-kernel, bbasu, mperttunen

On 04-12-19, 10:45, Mikko Perttunen wrote:
> Now, my original patchset (which this series is based on) did add
> nvidia,bpmp properties on the CPU DT nodes itself and query BPMP based on
> that. A change is still required for that since tegra_bpmp_get() currently
> takes a 'struct device *' which we don't have for a CPU DT node.

I may be missing the context, but the CPUs always have a struct device
* for them, which we get via a call to get_cpu_device(cpu), isn't ?

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2019-12-04  9:17     ` Viresh Kumar
@ 2019-12-04  9:33       ` Thierry Reding
  2019-12-04  9:51         ` Viresh Kumar
  2019-12-04 10:21       ` Mikko Perttunen
  1 sibling, 1 reply; 40+ messages in thread
From: Thierry Reding @ 2019-12-04  9:33 UTC (permalink / raw)
  To: Viresh Kumar, Rob Herring
  Cc: Mikko Perttunen, Sumit Gupta, rjw, catalin.marinas, will,
	jonathanh, talho, linux-pm, linux-tegra, linux-arm-kernel,
	linux-kernel, bbasu, mperttunen, devicetree

[-- Attachment #1: Type: text/plain, Size: 1792 bytes --]

On Wed, Dec 04, 2019 at 02:47:03PM +0530, Viresh Kumar wrote:
> On 04-12-19, 10:45, Mikko Perttunen wrote:
> > Now, my original patchset (which this series is based on) did add
> > nvidia,bpmp properties on the CPU DT nodes itself and query BPMP based on
> > that. A change is still required for that since tegra_bpmp_get() currently
> > takes a 'struct device *' which we don't have for a CPU DT node.
> 
> I may be missing the context, but the CPUs always have a struct device
> * for them, which we get via a call to get_cpu_device(cpu), isn't ?

Yeah, the code that registers this device is in drivers/base/cpu.c in
register_cpu(). It even retrieves the device tree node for the CPU from
device tree and stores it in cpu->dev.of_node, so we should be able to
just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
to the BPMP.

That said, I'm wondering if perhaps we could just add a compatible
string to the /cpus node for cases like this where we don't have an
actual device representing the CPU complex. There are a number of CPU
frequency drivers that register dummy devices just so that they have
something to bind a driver to.

If we allow the /cpus node to represent the CPU complex (if no other
"device" does that yet), we can add a compatible string and have the
cpufreq driver match on that.

Of course this would be slightly difficult to retrofit into existing
drivers because they'd need to remain backwards compatible with existing
device trees. But it would allow future drivers to do this a little more
elegantly. For some SoCs this may not matter, but especially once you
start depending on additional resources this would come in handy.

Adding Rob and the device tree mailing list for feedback on this idea.

Thierry

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2019-12-04  9:33       ` Thierry Reding
@ 2019-12-04  9:51         ` Viresh Kumar
  2020-04-07 10:05           ` Thierry Reding
  0 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2019-12-04  9:51 UTC (permalink / raw)
  To: Thierry Reding
  Cc: Rob Herring, Mikko Perttunen, Sumit Gupta, rjw, catalin.marinas,
	will, jonathanh, talho, linux-pm, linux-tegra, linux-arm-kernel,
	linux-kernel, bbasu, mperttunen, devicetree

On 04-12-19, 10:33, Thierry Reding wrote:
> Yeah, the code that registers this device is in drivers/base/cpu.c in
> register_cpu(). It even retrieves the device tree node for the CPU from
> device tree and stores it in cpu->dev.of_node, so we should be able to
> just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
> to the BPMP.
> 
> That said, I'm wondering if perhaps we could just add a compatible
> string to the /cpus node for cases like this where we don't have an
> actual device representing the CPU complex. There are a number of CPU
> frequency drivers that register dummy devices just so that they have
> something to bind a driver to.
> 
> If we allow the /cpus node to represent the CPU complex (if no other
> "device" does that yet), we can add a compatible string and have the
> cpufreq driver match on that.
> 
> Of course this would be slightly difficult to retrofit into existing
> drivers because they'd need to remain backwards compatible with existing
> device trees. But it would allow future drivers to do this a little more
> elegantly. For some SoCs this may not matter, but especially once you
> start depending on additional resources this would come in handy.
> 
> Adding Rob and the device tree mailing list for feedback on this idea.

Took some time to find this thread, but something around this was
suggested by Rafael earlier.

https://lore.kernel.org/lkml/8139001.Q4eV8YG1Il@vostro.rjw.lan/

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2019-12-04  9:17     ` Viresh Kumar
  2019-12-04  9:33       ` Thierry Reding
@ 2019-12-04 10:21       ` Mikko Perttunen
  2019-12-04 10:26         ` Viresh Kumar
  1 sibling, 1 reply; 40+ messages in thread
From: Mikko Perttunen @ 2019-12-04 10:21 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Thierry Reding, Sumit Gupta, rjw, catalin.marinas, will,
	jonathanh, talho, linux-pm, linux-tegra, linux-arm-kernel,
	linux-kernel, bbasu, mperttunen

Ah, it seems I was mistaken here. Thanks for the information.

Thanks,
Mikko

On 4.12.2019 11.17, Viresh Kumar wrote:
> On 04-12-19, 10:45, Mikko Perttunen wrote:
>> Now, my original patchset (which this series is based on) did add
>> nvidia,bpmp properties on the CPU DT nodes itself and query BPMP based on
>> that. A change is still required for that since tegra_bpmp_get() currently
>> takes a 'struct device *' which we don't have for a CPU DT node.
> 
> I may be missing the context, but the CPUs always have a struct device
> * for them, which we get via a call to get_cpu_device(cpu), isn't ?
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2019-12-04 10:21       ` Mikko Perttunen
@ 2019-12-04 10:26         ` Viresh Kumar
  0 siblings, 0 replies; 40+ messages in thread
From: Viresh Kumar @ 2019-12-04 10:26 UTC (permalink / raw)
  To: Mikko Perttunen
  Cc: Thierry Reding, Sumit Gupta, rjw, catalin.marinas, will,
	jonathanh, talho, linux-pm, linux-tegra, linux-arm-kernel,
	linux-kernel, bbasu, mperttunen

On 04-12-19, 12:21, Mikko Perttunen wrote:
> Ah, it seems I was mistaken here. Thanks for the information.

Please avoid top-posting [1][2] for LKML discussions.

Thanks.

-- 
viresh

[1] https://en.wikipedia.org/wiki/Posting_style#Top-posting
[2] https://web.archive.org/web/20080722025748/http://www.zip.com.au/~akpm/linux/patches/stuff/top-posting.txt

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-04  5:40   ` Viresh Kumar
@ 2019-12-04 10:55     ` sumitg
  2019-12-04 11:27       ` Viresh Kumar
  0 siblings, 1 reply; 40+ messages in thread
From: sumitg @ 2019-12-04 10:55 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

Hi Viresh,

On 04/12/19 11:10 AM, Viresh Kumar wrote:
> Hi Sumit,
>
> On 03-12-19, 23:02, Sumit Gupta wrote:
>> Add support for CPU frequency scaling on Tegra194. The frequency
>> of each core can be adjusted by writing a clock divisor value to
>> an MSR on the core. The range of valid divisors is queried from
>> the BPMP.
>>
>> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
>> Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
>> ---
>>   drivers/cpufreq/Kconfig.arm        |   6 +
>>   drivers/cpufreq/Makefile           |   1 +
>>   drivers/cpufreq/tegra194-cpufreq.c | 423 +++++++++++++++++++++++++++++++++++++
>>   3 files changed, 430 insertions(+)
>>   create mode 100644 drivers/cpufreq/tegra194-cpufreq.c
> Overall these are the things that you are doing here in the driver:
>
> - open coded clk_{get|set}_rate(), why can't you implement a clock
>    driver for the CPU and use the clk framework? You may not need the
>    (hacky) work-queue usage then probably.

In T194, CCPLEX doesn't have access to set clocks and the

clk_{get|set}_rate() functions set clocks by hook to BPMP R5.

CPU freq can be directly set by CCPLEX using MSR(NVFREQ_REQ_EL1).

As DVFS run's on BPMP, another MSR (NVFREQ_FEEDBACK_EL1) is

used to read the counters and calculate "actual" cpu freq at CCPLEX.

So, "cpuinfo_cur_freq" node gives the actual cpu frequency and not

given by node "scaling_cur_freq".

>
> - populating cpufreq table, you can probably add OPPs instead using
>    the same mechanism

We are reading available frequencies from BPMP to populate

cpufreq table and not using static opp table.

>
> - And then you can reuse the cpufreq-dt driver for your platform as
>    well, as is the case for few other tegra platforms.
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-04 10:55     ` sumitg
@ 2019-12-04 11:27       ` Viresh Kumar
  2019-12-04 13:57         ` Dmitry Osipenko
  2020-03-25 23:59         ` sumitg
  0 siblings, 2 replies; 40+ messages in thread
From: Viresh Kumar @ 2019-12-04 11:27 UTC (permalink / raw)
  To: sumitg
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

On 04-12-19, 16:25, sumitg wrote:
> In T194, CCPLEX doesn't have access to set clocks and the
> 
> clk_{get|set}_rate() functions set clocks by hook to BPMP R5.
> 
> CPU freq can be directly set by CCPLEX using MSR(NVFREQ_REQ_EL1).
> 
> As DVFS run's on BPMP, another MSR (NVFREQ_FEEDBACK_EL1) is
> 
> used to read the counters and calculate "actual" cpu freq at CCPLEX.
> 
> So, "cpuinfo_cur_freq" node gives the actual cpu frequency and not
> 
> given by node "scaling_cur_freq".

Right, but why can't this be hidden in the CPU's clk driver instead,
so cpufreq driver can just do clk_get_rate() and clk_set_rate() ?

> > 
> > - populating cpufreq table, you can probably add OPPs instead using
> >    the same mechanism
> 
> We are reading available frequencies from BPMP to populate
> 
> cpufreq table and not using static opp table.

Right and lot of other platforms read it from firmware (I believe BBMP
is a firmware here), and create OPPs at runtime. Look at this for
example:

drivers/cpufreq/qcom-cpufreq-hw.c

and search for dev_pm_opp_add().

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-04 11:27       ` Viresh Kumar
@ 2019-12-04 13:57         ` Dmitry Osipenko
  2019-12-05  2:51           ` Viresh Kumar
  2020-03-25 23:59         ` sumitg
  1 sibling, 1 reply; 40+ messages in thread
From: Dmitry Osipenko @ 2019-12-04 13:57 UTC (permalink / raw)
  To: Viresh Kumar, sumitg
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

04.12.2019 14:27, Viresh Kumar пишет:
> On 04-12-19, 16:25, sumitg wrote:
>> In T194, CCPLEX doesn't have access to set clocks and the
>>
>> clk_{get|set}_rate() functions set clocks by hook to BPMP R5.
>>
>> CPU freq can be directly set by CCPLEX using MSR(NVFREQ_REQ_EL1).
>>
>> As DVFS run's on BPMP, another MSR (NVFREQ_FEEDBACK_EL1) is
>>
>> used to read the counters and calculate "actual" cpu freq at CCPLEX.
>>
>> So, "cpuinfo_cur_freq" node gives the actual cpu frequency and not
>>
>> given by node "scaling_cur_freq".
> 
> Right, but why can't this be hidden in the CPU's clk driver instead,
> so cpufreq driver can just do clk_get_rate() and clk_set_rate() ?

What about to make use of dev_pm_opp_register_set_opp_helper()?


>>> - populating cpufreq table, you can probably add OPPs instead using
>>>    the same mechanism
>>
>> We are reading available frequencies from BPMP to populate
>>
>> cpufreq table and not using static opp table.
> 
> Right and lot of other platforms read it from firmware (I believe BBMP
> is a firmware here), and create OPPs at runtime. Look at this for
> example:
> 
> drivers/cpufreq/qcom-cpufreq-hw.c
> 
> and search for dev_pm_opp_add().
> 


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver Sumit Gupta
  2019-12-04  5:40   ` Viresh Kumar
@ 2019-12-04 13:59   ` Dmitry Osipenko
  2019-12-05 14:15   ` Dmitry Osipenko
  2020-03-26 11:50   ` Viresh Kumar
  3 siblings, 0 replies; 40+ messages in thread
From: Dmitry Osipenko @ 2019-12-04 13:59 UTC (permalink / raw)
  To: Sumit Gupta, rjw, viresh.kumar, catalin.marinas, will,
	thierry.reding, jonathanh, talho, linux-pm, linux-tegra,
	linux-arm-kernel, linux-kernel
  Cc: bbasu, mperttunen

03.12.2019 20:32, Sumit Gupta пишет:
> Add support for CPU frequency scaling on Tegra194. The frequency
> of each core can be adjusted by writing a clock divisor value to
> an MSR on the core. The range of valid divisors is queried from
> the BPMP.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
> ---
>  drivers/cpufreq/Kconfig.arm        |   6 +
>  drivers/cpufreq/Makefile           |   1 +
>  drivers/cpufreq/tegra194-cpufreq.c | 423 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 430 insertions(+)
>  create mode 100644 drivers/cpufreq/tegra194-cpufreq.c
> 
> diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
> index a905796..4bcd47c 100644
> --- a/drivers/cpufreq/Kconfig.arm
> +++ b/drivers/cpufreq/Kconfig.arm
> @@ -320,6 +320,12 @@ config ARM_TEGRA186_CPUFREQ
>  	help
>  	  This adds the CPUFreq driver support for Tegra186 SOCs.
>  
> +config ARM_TEGRA194_CPUFREQ
> +	tristate "Tegra194 CPUFreq support"
> +	depends on ARCH_TEGRA && TEGRA_BPMP
> +	help
> +	  This adds CPU frequency driver support for Tegra194 SOCs.
> +
>  config ARM_TI_CPUFREQ
>  	bool "Texas Instruments CPUFreq support"
>  	depends on ARCH_OMAP2PLUS
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index 9a9f5cc..433d492 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -85,6 +85,7 @@ obj-$(CONFIG_ARM_TANGO_CPUFREQ)		+= tango-cpufreq.o
>  obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
>  obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
>  obj-$(CONFIG_ARM_TEGRA186_CPUFREQ)	+= tegra186-cpufreq.o
> +obj-$(CONFIG_ARM_TEGRA194_CPUFREQ)	+= tegra194-cpufreq.o
>  obj-$(CONFIG_ARM_TI_CPUFREQ)		+= ti-cpufreq.o
>  obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
>  
> diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
> new file mode 100644
> index 0000000..9df12f4
> --- /dev/null
> +++ b/drivers/cpufreq/tegra194-cpufreq.c
> @@ -0,0 +1,423 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/cpufreq.h>
> +#include <linux/delay.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/of_platform.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +
> +#include <asm/smp_plat.h>
> +
> +#include <soc/tegra/bpmp.h>
> +#include <soc/tegra/bpmp-abi.h>
> +
> +#define KHZ                     1000
> +#define REF_CLK_MHZ             408 /* 408 MHz */
> +#define US_DELAY                2000
> +#define US_DELAY_MIN            2
> +#define CPUFREQ_TBL_STEP_HZ     (50 * KHZ * KHZ)
> +#define MAX_CNT                 ~0U
> +
> +/* cpufreq transisition latency */
> +#define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */
> +
> +enum cluster {
> +	CLUSTER0,
> +	CLUSTER1,
> +	CLUSTER2,
> +	CLUSTER3,
> +	MAX_CLUSTERS,
> +};
> +
> +struct tegra194_cpufreq_data {
> +	void __iomem *regs;
> +	size_t num_clusters;
> +	struct cpufreq_frequency_table **tables;
> +};
> +
> +static DEFINE_MUTEX(cpufreq_lock);
> +
> +struct tegra_cpu_ctr {
> +	u32 cpu;
> +	u32 delay;
> +	u32 coreclk_cnt, last_coreclk_cnt;
> +	u32 refclk_cnt, last_refclk_cnt;
> +};
> +
> +static struct workqueue_struct *read_counters_wq;
> +struct read_counters_work {
> +	struct work_struct work;
> +	struct tegra_cpu_ctr c;
> +};
> +
> +static enum cluster get_cpu_cluster(u8 cpu)
> +{
> +	return MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
> +}
> +
> +/*
> + * Read per-core Read-only system register NVFREQ_FEEDBACK_EL1.
> + * The register provides frequency feedback information to
> + * determine the average actual frequency a core has run at over
> + * a period of time.
> + *	[31:0] PLLP counter: Counts at fixed frequency (408 MHz)
> + *	[63:32] Core clock counter: counts on every core clock cycle
> + *			where the core is architecturally clocking
> + */
> +static u64 read_freq_feedback(void)
> +{
> +	u64 val = 0;
> +
> +	asm volatile("mrs %0, s3_0_c15_c0_5" : "=r" (val) : );
> +
> +	return val;
> +}
> +
> +u16 map_freq_to_ndiv(struct mrq_cpu_ndiv_limits_response *nltbl, u32 freq)
> +{
> +	return DIV_ROUND_UP(freq * nltbl->pdiv * nltbl->mdiv,
> +			    nltbl->ref_clk_hz / KHZ);
> +}

This function isn't used anywhere.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-04 13:57         ` Dmitry Osipenko
@ 2019-12-05  2:51           ` Viresh Kumar
  2019-12-05 12:55             ` Dmitry Osipenko
  0 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2019-12-05  2:51 UTC (permalink / raw)
  To: Dmitry Osipenko
  Cc: sumitg, rjw, catalin.marinas, will, thierry.reding, jonathanh,
	talho, linux-pm, linux-tegra, linux-arm-kernel, linux-kernel,
	bbasu, mperttunen

On 04-12-19, 16:57, Dmitry Osipenko wrote:
> 04.12.2019 14:27, Viresh Kumar пишет:
> > On 04-12-19, 16:25, sumitg wrote:
> >> In T194, CCPLEX doesn't have access to set clocks and the
> >>
> >> clk_{get|set}_rate() functions set clocks by hook to BPMP R5.
> >>
> >> CPU freq can be directly set by CCPLEX using MSR(NVFREQ_REQ_EL1).
> >>
> >> As DVFS run's on BPMP, another MSR (NVFREQ_FEEDBACK_EL1) is
> >>
> >> used to read the counters and calculate "actual" cpu freq at CCPLEX.
> >>
> >> So, "cpuinfo_cur_freq" node gives the actual cpu frequency and not
> >>
> >> given by node "scaling_cur_freq".
> > 
> > Right, but why can't this be hidden in the CPU's clk driver instead,
> > so cpufreq driver can just do clk_get_rate() and clk_set_rate() ?
> 
> What about to make use of dev_pm_opp_register_set_opp_helper()?

It has a different purpose where we have to play with different
regulators. And that won't help with clk_get_rate() anyway.

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-05  2:51           ` Viresh Kumar
@ 2019-12-05 12:55             ` Dmitry Osipenko
  0 siblings, 0 replies; 40+ messages in thread
From: Dmitry Osipenko @ 2019-12-05 12:55 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: sumitg, rjw, catalin.marinas, will, thierry.reding, jonathanh,
	talho, linux-pm, linux-tegra, linux-arm-kernel, linux-kernel,
	bbasu, mperttunen

05.12.2019 05:51, Viresh Kumar пишет:
> On 04-12-19, 16:57, Dmitry Osipenko wrote:
>> 04.12.2019 14:27, Viresh Kumar пишет:
>>> On 04-12-19, 16:25, sumitg wrote:
>>>> In T194, CCPLEX doesn't have access to set clocks and the
>>>>
>>>> clk_{get|set}_rate() functions set clocks by hook to BPMP R5.
>>>>
>>>> CPU freq can be directly set by CCPLEX using MSR(NVFREQ_REQ_EL1).
>>>>
>>>> As DVFS run's on BPMP, another MSR (NVFREQ_FEEDBACK_EL1) is
>>>>
>>>> used to read the counters and calculate "actual" cpu freq at CCPLEX.
>>>>
>>>> So, "cpuinfo_cur_freq" node gives the actual cpu frequency and not
>>>>
>>>> given by node "scaling_cur_freq".
>>>
>>> Right, but why can't this be hidden in the CPU's clk driver instead,
>>> so cpufreq driver can just do clk_get_rate() and clk_set_rate() ?
>>
>> What about to make use of dev_pm_opp_register_set_opp_helper()?
> 
> It has a different purpose where we have to play with different
> regulators. And that won't help with clk_get_rate() anyway.

Indeed that won't help with the clk.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver Sumit Gupta
  2019-12-04  5:40   ` Viresh Kumar
  2019-12-04 13:59   ` Dmitry Osipenko
@ 2019-12-05 14:15   ` Dmitry Osipenko
  2020-03-26 11:50   ` Viresh Kumar
  3 siblings, 0 replies; 40+ messages in thread
From: Dmitry Osipenko @ 2019-12-05 14:15 UTC (permalink / raw)
  To: Sumit Gupta, rjw, viresh.kumar, catalin.marinas, will,
	thierry.reding, jonathanh, talho, linux-pm, linux-tegra,
	linux-arm-kernel, linux-kernel
  Cc: bbasu, mperttunen

03.12.2019 20:32, Sumit Gupta пишет:
> Add support for CPU frequency scaling on Tegra194. The frequency
> of each core can be adjusted by writing a clock divisor value to
> an MSR on the core. The range of valid divisors is queried from
> the BPMP.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
> ---
>  drivers/cpufreq/Kconfig.arm        |   6 +
>  drivers/cpufreq/Makefile           |   1 +
>  drivers/cpufreq/tegra194-cpufreq.c | 423 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 430 insertions(+)
>  create mode 100644 drivers/cpufreq/tegra194-cpufreq.c
> 
> diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
> index a905796..4bcd47c 100644
> --- a/drivers/cpufreq/Kconfig.arm
> +++ b/drivers/cpufreq/Kconfig.arm
> @@ -320,6 +320,12 @@ config ARM_TEGRA186_CPUFREQ
>  	help
>  	  This adds the CPUFreq driver support for Tegra186 SOCs.
>  
> +config ARM_TEGRA194_CPUFREQ
> +	tristate "Tegra194 CPUFreq support"
> +	depends on ARCH_TEGRA && TEGRA_BPMP
> +	help
> +	  This adds CPU frequency driver support for Tegra194 SOCs.
> +
>  config ARM_TI_CPUFREQ
>  	bool "Texas Instruments CPUFreq support"
>  	depends on ARCH_OMAP2PLUS
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index 9a9f5cc..433d492 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -85,6 +85,7 @@ obj-$(CONFIG_ARM_TANGO_CPUFREQ)		+= tango-cpufreq.o
>  obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
>  obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
>  obj-$(CONFIG_ARM_TEGRA186_CPUFREQ)	+= tegra186-cpufreq.o
> +obj-$(CONFIG_ARM_TEGRA194_CPUFREQ)	+= tegra194-cpufreq.o
>  obj-$(CONFIG_ARM_TI_CPUFREQ)		+= ti-cpufreq.o
>  obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
>  
> diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
> new file mode 100644
> index 0000000..9df12f4
> --- /dev/null
> +++ b/drivers/cpufreq/tegra194-cpufreq.c
> @@ -0,0 +1,423 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/cpufreq.h>
> +#include <linux/delay.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/of_platform.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +
> +#include <asm/smp_plat.h>
> +
> +#include <soc/tegra/bpmp.h>
> +#include <soc/tegra/bpmp-abi.h>
> +
> +#define KHZ                     1000
> +#define REF_CLK_MHZ             408 /* 408 MHz */
> +#define US_DELAY                2000
> +#define US_DELAY_MIN            2
> +#define CPUFREQ_TBL_STEP_HZ     (50 * KHZ * KHZ)
> +#define MAX_CNT                 ~0U
> +
> +/* cpufreq transisition latency */
> +#define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */
> +
> +enum cluster {
> +	CLUSTER0,
> +	CLUSTER1,
> +	CLUSTER2,
> +	CLUSTER3,
> +	MAX_CLUSTERS,
> +};
> +
> +struct tegra194_cpufreq_data {
> +	void __iomem *regs;
> +	size_t num_clusters;
> +	struct cpufreq_frequency_table **tables;
> +};
> +
> +static DEFINE_MUTEX(cpufreq_lock);
> +
> +struct tegra_cpu_ctr {
> +	u32 cpu;
> +	u32 delay;
> +	u32 coreclk_cnt, last_coreclk_cnt;
> +	u32 refclk_cnt, last_refclk_cnt;
> +};
> +
> +static struct workqueue_struct *read_counters_wq;
> +struct read_counters_work {
> +	struct work_struct work;
> +	struct tegra_cpu_ctr c;
> +};
> +
> +static enum cluster get_cpu_cluster(u8 cpu)
> +{
> +	return MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
> +}
> +
> +/*
> + * Read per-core Read-only system register NVFREQ_FEEDBACK_EL1.
> + * The register provides frequency feedback information to
> + * determine the average actual frequency a core has run at over
> + * a period of time.
> + *	[31:0] PLLP counter: Counts at fixed frequency (408 MHz)
> + *	[63:32] Core clock counter: counts on every core clock cycle
> + *			where the core is architecturally clocking
> + */
> +static u64 read_freq_feedback(void)
> +{
> +	u64 val = 0;
> +
> +	asm volatile("mrs %0, s3_0_c15_c0_5" : "=r" (val) : );
> +
> +	return val;
> +}
> +
> +u16 map_freq_to_ndiv(struct mrq_cpu_ndiv_limits_response *nltbl, u32 freq)
> +{
> +	return DIV_ROUND_UP(freq * nltbl->pdiv * nltbl->mdiv,
> +			    nltbl->ref_clk_hz / KHZ);
> +}
> +
> +static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response
> +				   *nltbl, u16 ndiv)
> +{
> +	return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv);
> +}
> +
> +static void tegra_read_counters(struct work_struct *work)
> +{
> +	struct read_counters_work *read_counters_work;
> +	struct tegra_cpu_ctr *c;
> +	u64 val;
> +
> +	/*
> +	 * ref_clk_counter(32 bit counter) runs on constant clk,
> +	 * pll_p(408MHz).
> +	 * It will take = 2 ^ 32 / 408 MHz to overflow ref clk counter
> +	 *              = 10526880 usec = 10.527 sec to overflow
> +	 *
> +	 * Like wise core_clk_counter(32 bit counter) runs on core clock.
> +	 * It's synchronized to crab_clk (cpu_crab_clk) which runs at
> +	 * freq of cluster. Assuming max cluster clock ~2000MHz,
> +	 * It will take = 2 ^ 32 / 2000 MHz to overflow core clk counter
> +	 *              = ~2.147 sec to overflow
> +	 */
> +	read_counters_work = container_of(work, struct read_counters_work,
> +					  work);
> +	c = &read_counters_work->c;
> +
> +	val = read_freq_feedback();
> +	c->last_refclk_cnt = lower_32_bits(val);
> +	c->last_coreclk_cnt = upper_32_bits(val);
> +	udelay(c->delay);
> +	val = read_freq_feedback();
> +	c->refclk_cnt = lower_32_bits(val);
> +	c->coreclk_cnt = upper_32_bits(val);
> +}
> +
> +/*
> + * Return instantaneous cpu speed
> + * Instantaneous freq is calculated as -
> + * -Takes sample on every query of getting the freq.
> + *	- Read core and ref clock counters;
> + *	- Delay for X us
> + *	- Read above cycle counters again
> + *	- Calculates freq by subtracting current and previous counters
> + *	  divided by the delay time or eqv. of ref_clk_counter in delta time
> + *	- Return Kcycles/second, freq in KHz
> + *
> + *	delta time period = x sec
> + *			  = delta ref_clk_counter / (408 * 10^6) sec
> + *	freq in Hz = cycles/sec
> + *		   = (delta cycles / x sec
> + *		   = (delta cycles * 408 * 10^6) / delta ref_clk_counter
> + *	in KHz	   = (delta cycles * 408 * 10^3) / delta ref_clk_counter
> + *
> + * @cpu - logical cpu whose freq to be updated
> + * Returns freq in KHz on success, 0 if cpu is offline
> + */
> +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
> +{
> +	struct read_counters_work read_counters_work;
> +	struct tegra_cpu_ctr c;
> +	u32 delta_refcnt;
> +	u32 delta_ccnt;
> +	u32 rate_mhz;
> +
> +	read_counters_work.c.cpu = cpu;
> +	read_counters_work.c.delay = delay;
> +	INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
> +	queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
> +	flush_work(&read_counters_work.work);

What will happen if CPU is offline?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-04 11:27       ` Viresh Kumar
  2019-12-04 13:57         ` Dmitry Osipenko
@ 2020-03-25 23:59         ` sumitg
  1 sibling, 0 replies; 40+ messages in thread
From: sumitg @ 2020-03-25 23:59 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen, sumitg

Hi Viresh,

Sorry for the late reply.

On 04/12/19 4:57 PM, Viresh Kumar wrote:
> On 04-12-19, 16:25, sumitg wrote:
>> In T194, CCPLEX doesn't have access to set clocks and the
>>
>> clk_{get|set}_rate() functions set clocks by hook to BPMP R5.
>>
>> CPU freq can be directly set by CCPLEX using MSR(NVFREQ_REQ_EL1).
>>
>> As DVFS run's on BPMP, another MSR (NVFREQ_FEEDBACK_EL1) is
>>
>> used to read the counters and calculate "actual" cpu freq at CCPLEX.
>>
>> So, "cpuinfo_cur_freq" node gives the actual cpu frequency and not
>>
>> given by node "scaling_cur_freq".
> Right, but why can't this be hidden in the CPU's clk driver instead,
> so cpufreq driver can just do clk_get_rate() and clk_set_rate() ?
>
>>> - populating cpufreq table, you can probably add OPPs instead using
>>>     the same mechanism
>> We are reading available frequencies from BPMP to populate
>>
>> cpufreq table and not using static opp table.
> Right and lot of other platforms read it from firmware (I believe BBMP
> is a firmware here), and create OPPs at runtime. Look at this for
> example:
>
> drivers/cpufreq/qcom-cpufreq-hw.c
>
> and search for dev_pm_opp_add().
>
- I think we don't need separate CPU clock driver & to reuse

   cpufreq-dt driver as we will still have to replicate same logic

   from cpufreq driver to that dummy clock driver for calculating

   actual cpufreq from MSR value. So, it won't add much value.

     - "qcom-cpufreq-hw.c" is using clk_get_rate() during init, but

       the frequency ops "get/target_index" write to register directly

       and not using clk api's. Also, the clock driver from gcc-msm*.c

       seem to handle all clocks in CCPLEX.

       Tegra SOC's which didn't have BPMP had the clock handling

       done by CCPLEX. They were using clk_{get|set}_rate() api's

       as you mentioned. But in Tegra194, all clock handling is done

       within BPMP R5 core except CPU clock(which is through MSR).

- Adding OPP's with dev_pm_opp_add() is also not required as:

    1) We don't have any consumer like energy model or EAS in

        Tegra194 which it seems was valid with "qcom-cpufreq-hw.c".

        So, i think it won't be useful for T194.

    2) Also, there is no way to map ndiv to voltage in kernel. Kernel

        driver passes ndiv value to BPMP(R5) which converts to vhint.

Please share your inputs.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver Sumit Gupta
                     ` (2 preceding siblings ...)
  2019-12-05 14:15   ` Dmitry Osipenko
@ 2020-03-26 11:50   ` Viresh Kumar
  2020-04-04 18:38     ` sumitg
  3 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2020-03-26 11:50 UTC (permalink / raw)
  To: Sumit Gupta
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

On 03-12-19, 23:02, Sumit Gupta wrote:
> diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
> new file mode 100644
> index 0000000..9df12f4
> --- /dev/null
> +++ b/drivers/cpufreq/tegra194-cpufreq.c
> @@ -0,0 +1,423 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/cpufreq.h>
> +#include <linux/delay.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/of_platform.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +
> +#include <asm/smp_plat.h>
> +
> +#include <soc/tegra/bpmp.h>
> +#include <soc/tegra/bpmp-abi.h>
> +
> +#define KHZ                     1000
> +#define REF_CLK_MHZ             408 /* 408 MHz */
> +#define US_DELAY                2000
> +#define US_DELAY_MIN            2
> +#define CPUFREQ_TBL_STEP_HZ     (50 * KHZ * KHZ)
> +#define MAX_CNT                 ~0U
> +
> +/* cpufreq transisition latency */
> +#define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */
> +
> +enum cluster {
> +	CLUSTER0,
> +	CLUSTER1,
> +	CLUSTER2,
> +	CLUSTER3,

All these have same CPUs ? Or big little kind of stuff ? How come they
have different frequency tables ?

> +	MAX_CLUSTERS,
> +};
> +
> +struct tegra194_cpufreq_data {
> +	void __iomem *regs;
> +	size_t num_clusters;
> +	struct cpufreq_frequency_table **tables;
> +};
> +
> +static DEFINE_MUTEX(cpufreq_lock);
> +
> +struct tegra_cpu_ctr {
> +	u32 cpu;
> +	u32 delay;
> +	u32 coreclk_cnt, last_coreclk_cnt;
> +	u32 refclk_cnt, last_refclk_cnt;
> +};
> +
> +static struct workqueue_struct *read_counters_wq;
> +struct read_counters_work {
> +	struct work_struct work;
> +	struct tegra_cpu_ctr c;
> +};
> +
> +static enum cluster get_cpu_cluster(u8 cpu)
> +{
> +	return MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
> +}
> +
> +/*
> + * Read per-core Read-only system register NVFREQ_FEEDBACK_EL1.
> + * The register provides frequency feedback information to
> + * determine the average actual frequency a core has run at over
> + * a period of time.
> + *	[31:0] PLLP counter: Counts at fixed frequency (408 MHz)
> + *	[63:32] Core clock counter: counts on every core clock cycle
> + *			where the core is architecturally clocking
> + */
> +static u64 read_freq_feedback(void)
> +{
> +	u64 val = 0;
> +
> +	asm volatile("mrs %0, s3_0_c15_c0_5" : "=r" (val) : );
> +
> +	return val;
> +}
> +
> +u16 map_freq_to_ndiv(struct mrq_cpu_ndiv_limits_response *nltbl, u32 freq)
> +{
> +	return DIV_ROUND_UP(freq * nltbl->pdiv * nltbl->mdiv,
> +			    nltbl->ref_clk_hz / KHZ);
> +}
> +
> +static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response
> +				   *nltbl, u16 ndiv)
> +{
> +	return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv);
> +}
> +
> +static void tegra_read_counters(struct work_struct *work)
> +{
> +	struct read_counters_work *read_counters_work;
> +	struct tegra_cpu_ctr *c;
> +	u64 val;
> +
> +	/*
> +	 * ref_clk_counter(32 bit counter) runs on constant clk,
> +	 * pll_p(408MHz).
> +	 * It will take = 2 ^ 32 / 408 MHz to overflow ref clk counter
> +	 *              = 10526880 usec = 10.527 sec to overflow
> +	 *
> +	 * Like wise core_clk_counter(32 bit counter) runs on core clock.
> +	 * It's synchronized to crab_clk (cpu_crab_clk) which runs at
> +	 * freq of cluster. Assuming max cluster clock ~2000MHz,
> +	 * It will take = 2 ^ 32 / 2000 MHz to overflow core clk counter
> +	 *              = ~2.147 sec to overflow
> +	 */
> +	read_counters_work = container_of(work, struct read_counters_work,
> +					  work);
> +	c = &read_counters_work->c;
> +
> +	val = read_freq_feedback();
> +	c->last_refclk_cnt = lower_32_bits(val);
> +	c->last_coreclk_cnt = upper_32_bits(val);
> +	udelay(c->delay);
> +	val = read_freq_feedback();
> +	c->refclk_cnt = lower_32_bits(val);
> +	c->coreclk_cnt = upper_32_bits(val);
> +}
> +
> +/*
> + * Return instantaneous cpu speed
> + * Instantaneous freq is calculated as -
> + * -Takes sample on every query of getting the freq.
> + *	- Read core and ref clock counters;
> + *	- Delay for X us
> + *	- Read above cycle counters again
> + *	- Calculates freq by subtracting current and previous counters
> + *	  divided by the delay time or eqv. of ref_clk_counter in delta time
> + *	- Return Kcycles/second, freq in KHz
> + *
> + *	delta time period = x sec
> + *			  = delta ref_clk_counter / (408 * 10^6) sec
> + *	freq in Hz = cycles/sec
> + *		   = (delta cycles / x sec
> + *		   = (delta cycles * 408 * 10^6) / delta ref_clk_counter
> + *	in KHz	   = (delta cycles * 408 * 10^3) / delta ref_clk_counter
> + *
> + * @cpu - logical cpu whose freq to be updated
> + * Returns freq in KHz on success, 0 if cpu is offline
> + */
> +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
> +{
> +	struct read_counters_work read_counters_work;
> +	struct tegra_cpu_ctr c;
> +	u32 delta_refcnt;
> +	u32 delta_ccnt;
> +	u32 rate_mhz;
> +
> +	read_counters_work.c.cpu = cpu;
> +	read_counters_work.c.delay = delay;
> +	INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
> +	queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
> +	flush_work(&read_counters_work.work);

Why can't this be done in current context ?

> +	c = read_counters_work.c;
> +
> +	if (c.coreclk_cnt < c.last_coreclk_cnt)
> +		delta_ccnt = c.coreclk_cnt + (MAX_CNT - c.last_coreclk_cnt);
> +	else
> +		delta_ccnt = c.coreclk_cnt - c.last_coreclk_cnt;
> +	if (!delta_ccnt)
> +		return 0;
> +
> +	/* ref clock is 32 bits */
> +	if (c.refclk_cnt < c.last_refclk_cnt)
> +		delta_refcnt = c.refclk_cnt + (MAX_CNT - c.last_refclk_cnt);
> +	else
> +		delta_refcnt = c.refclk_cnt - c.last_refclk_cnt;
> +	if (!delta_refcnt) {
> +		pr_debug("cpufreq: %d is idle, delta_refcnt: 0\n", cpu);
> +		return 0;
> +	}
> +	rate_mhz = ((unsigned long)(delta_ccnt * REF_CLK_MHZ)) / delta_refcnt;
> +
> +	return (rate_mhz * KHZ); /* in KHz */
> +}
> +
> +static unsigned int tegra194_get_speed(u32 cpu)
> +{
> +	return tegra194_get_speed_common(cpu, US_DELAY);
> +}
> +
> +static unsigned int tegra194_fast_get_speed(u32 cpu)
> +{
> +	return tegra194_get_speed_common(cpu, US_DELAY_MIN);

Why is this required specially here ? Why can't you work with normal
delay ?

> +}
> +
> +static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
> +{
> +	struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
> +	int cluster = get_cpu_cluster(policy->cpu);
> +
> +	if (cluster >= data->num_clusters)
> +		return -EINVAL;
> +
> +	policy->cur = tegra194_fast_get_speed(policy->cpu); /* boot freq */
> +
> +	/* set same policy for all cpus */
> +	cpumask_copy(policy->cpus, cpu_possible_mask);

You are copying cpu_possible_mask mask here, and so this routine will
get called only once.

I still don't understand the logic behind clusters and frequency
tables.

> +
> +	policy->freq_table = data->tables[cluster];
> +	policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY;
> +
> +	return 0;
> +}
> +
> +static void set_cpu_ndiv(void *data)
> +{
> +	struct cpufreq_frequency_table *tbl = data;
> +	u64 ndiv_val = (u64)tbl->driver_data;
> +
> +	asm volatile("msr s3_0_c15_c0_4, %0" : : "r" (ndiv_val));
> +}
> +
> +static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
> +				       unsigned int index)
> +{
> +	struct cpufreq_frequency_table *tbl = policy->freq_table + index;
> +	static struct cpufreq_freqs freqs;
> +
> +	mutex_lock(&cpufreq_lock);

No need of lock here.

> +	freqs.old = policy->cur;
> +	freqs.new = tbl->frequency;
> +
> +	cpufreq_freq_transition_begin(policy, &freqs);
> +	on_each_cpu_mask(policy->cpus, set_cpu_ndiv, tbl, true);

When CPUs share clock line, why is this required for every CPU ?

> +	cpufreq_freq_transition_end(policy, &freqs, 0);
> +
> +	mutex_unlock(&cpufreq_lock);
> +
> +	return 0;
> +}
> +
> +static struct cpufreq_driver tegra194_cpufreq_driver = {
> +	.name = "tegra194",
> +	.flags = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS |
> +		CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_ASYNC_NOTIFICATION,

Why Async here ? I am really confused if I am not able to understand
the driver or you :)

> +	.verify = cpufreq_generic_frequency_table_verify,
> +	.target_index = tegra194_cpufreq_set_target,
> +	.get = tegra194_get_speed,
> +	.init = tegra194_cpufreq_init,
> +	.attr = cpufreq_generic_attr,
> +};
> +
> +static void tegra194_cpufreq_free_resources(void)
> +{
> +	flush_workqueue(read_counters_wq);
> +	destroy_workqueue(read_counters_wq);
> +}
> +
> +static struct cpufreq_frequency_table *init_freq_table

Don't break line here, rather break after above *.

> +		(struct platform_device *pdev, struct tegra_bpmp *bpmp,
> +		 unsigned int cluster_id)
> +{
> +	struct cpufreq_frequency_table *opp_table;

Please name it freq_table :)

> +	struct mrq_cpu_ndiv_limits_response resp;
> +	unsigned int num_freqs, ndiv, delta_ndiv;
> +	struct mrq_cpu_ndiv_limits_request req;
> +	struct tegra_bpmp_message msg;
> +	u16 freq_table_step_size;
> +	int err, index;
> +
> +	memset(&req, 0, sizeof(req));
> +	req.cluster_id = cluster_id;
> +
> +	memset(&msg, 0, sizeof(msg));
> +	msg.mrq = MRQ_CPU_NDIV_LIMITS;
> +	msg.tx.data = &req;
> +	msg.tx.size = sizeof(req);
> +	msg.rx.data = &resp;
> +	msg.rx.size = sizeof(resp);
> +
> +	err = tegra_bpmp_transfer(bpmp, &msg);
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	/*
> +	 * Make sure frequency table step is a multiple of mdiv to match
> +	 * vhint table granularity.
> +	 */
> +	freq_table_step_size = resp.mdiv *
> +			DIV_ROUND_UP(CPUFREQ_TBL_STEP_HZ, resp.ref_clk_hz);
> +
> +	dev_dbg(&pdev->dev, "cluster %d: frequency table step size: %d\n",
> +		cluster_id, freq_table_step_size);
> +
> +	delta_ndiv = resp.ndiv_max - resp.ndiv_min;
> +
> +	if (unlikely(delta_ndiv == 0))
> +		num_freqs = 1;
> +	else
> +		/* We store both ndiv_min and ndiv_max hence the +1 */
> +		num_freqs = delta_ndiv / freq_table_step_size + 1;
> +
> +	num_freqs += (delta_ndiv % freq_table_step_size) ? 1 : 0;
> +
> +	opp_table = devm_kcalloc(&pdev->dev, num_freqs + 1, sizeof(*opp_table),
> +				 GFP_KERNEL);
> +	if (!opp_table)
> +		return ERR_PTR(-ENOMEM);
> +
> +	for (index = 0, ndiv = resp.ndiv_min;
> +			ndiv < resp.ndiv_max;
> +			index++, ndiv += freq_table_step_size) {
> +		opp_table[index].driver_data = ndiv;
> +		opp_table[index].frequency = map_ndiv_to_freq(&resp, ndiv);
> +	}
> +
> +	opp_table[index].driver_data = resp.ndiv_max;
> +	opp_table[index++].frequency = map_ndiv_to_freq(&resp, resp.ndiv_max);
> +	opp_table[index].frequency = CPUFREQ_TABLE_END;
> +
> +	return opp_table;
> +}
> +
> +static int tegra194_cpufreq_probe(struct platform_device *pdev)
> +{
> +	struct tegra194_cpufreq_data *data;
> +	struct tegra_bpmp *bpmp;
> +	int err, i;
> +
> +	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
> +	if (!data)
> +		return -ENOMEM;
> +
> +	data->num_clusters = MAX_CLUSTERS;
> +	data->tables = devm_kcalloc(&pdev->dev, data->num_clusters,
> +				    sizeof(*data->tables), GFP_KERNEL);
> +	if (!data->tables)
> +		return -ENOMEM;
> +
> +	platform_set_drvdata(pdev, data);
> +
> +	read_counters_wq = alloc_workqueue("read_counters_wq", __WQ_LEGACY, 1);
> +	if (!read_counters_wq) {
> +		dev_err(&pdev->dev, "fail to create_workqueue\n");
> +		return -EINVAL;
> +	}
> +
> +	bpmp = of_tegra_bpmp_get();
> +	if (IS_ERR(bpmp)) {
> +		err = PTR_ERR(bpmp);
> +		goto err_free_res;
> +	}
> +
> +	for (i = 0; i < data->num_clusters; i++) {
> +		data->tables[i] = init_freq_table(pdev, bpmp, i);
> +		if (IS_ERR(data->tables[i])) {
> +			err = PTR_ERR(data->tables[i]);
> +			goto put_bpmp;
> +		}
> +	}
> +
> +	tegra_bpmp_put(bpmp);
> +
> +	tegra194_cpufreq_driver.driver_data = data;
> +
> +	err = cpufreq_register_driver(&tegra194_cpufreq_driver);
> +	if (err)
> +		goto err_free_res;
> +
> +	return err;
> +
> +put_bpmp:
> +	tegra_bpmp_put(bpmp);
> +err_free_res:
> +	tegra194_cpufreq_free_resources();
> +	return err;
> +}
> +
> +static int tegra194_cpufreq_remove(struct platform_device *pdev)
> +{
> +	cpufreq_unregister_driver(&tegra194_cpufreq_driver);
> +	tegra194_cpufreq_free_resources();
> +
> +	return 0;
> +}
> +
> +static struct platform_driver tegra194_cpufreq_platform_driver = {
> +	.driver = {
> +		.name = "tegra194-cpufreq",
> +	},
> +	.probe = tegra194_cpufreq_probe,
> +	.remove = tegra194_cpufreq_remove,
> +};
> +
> +static int __init tegra_cpufreq_init(void)

I seem to be forgetting this, but should we use __init with modules or
not ?

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-03-26 11:50   ` Viresh Kumar
@ 2020-04-04 18:38     ` sumitg
  2020-04-06  2:55       ` Viresh Kumar
  0 siblings, 1 reply; 40+ messages in thread
From: sumitg @ 2020-04-04 18:38 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen, sumitg



On 26/03/20 5:20 PM, Viresh Kumar wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 03-12-19, 23:02, Sumit Gupta wrote:
>> diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
>> new file mode 100644
>> index 0000000..9df12f4
>> --- /dev/null
>> +++ b/drivers/cpufreq/tegra194-cpufreq.c
>> @@ -0,0 +1,423 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved
>> + */
>> +
>> +#include <linux/cpu.h>
>> +#include <linux/cpufreq.h>
>> +#include <linux/delay.h>
>> +#include <linux/dma-mapping.h>
>> +#include <linux/module.h>
>> +#include <linux/of.h>
>> +#include <linux/of_platform.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/slab.h>
>> +
>> +#include <asm/smp_plat.h>
>> +
>> +#include <soc/tegra/bpmp.h>
>> +#include <soc/tegra/bpmp-abi.h>
>> +
>> +#define KHZ                     1000
>> +#define REF_CLK_MHZ             408 /* 408 MHz */
>> +#define US_DELAY                2000
>> +#define US_DELAY_MIN            2
>> +#define CPUFREQ_TBL_STEP_HZ     (50 * KHZ * KHZ)
>> +#define MAX_CNT                 ~0U
>> +
>> +/* cpufreq transisition latency */
>> +#define TEGRA_CPUFREQ_TRANSITION_LATENCY (300 * 1000) /* unit in nanoseconds */
>> +
>> +enum cluster {
>> +     CLUSTER0,
>> +     CLUSTER1,
>> +     CLUSTER2,
>> +     CLUSTER3,
> 
> All these have same CPUs ? Or big little kind of stuff ? How come they
> have different frequency tables ?
> 
T194 SOC has homogeneous architecture where each cluster has two 
symmetric Carmel cores and and not big little. LUT's are per cluster and 
all LUT's have same values currently. Future SOC's may have different 
LUT values per cluster.

>> +     MAX_CLUSTERS,
>> +};
>> +
>> +struct tegra194_cpufreq_data {
>> +     void __iomem *regs;
>> +     size_t num_clusters;
>> +     struct cpufreq_frequency_table **tables;
>> +};
>> +
>> +static DEFINE_MUTEX(cpufreq_lock);
>> +
>> +struct tegra_cpu_ctr {
>> +     u32 cpu;
>> +     u32 delay;
>> +     u32 coreclk_cnt, last_coreclk_cnt;
>> +     u32 refclk_cnt, last_refclk_cnt;
>> +};
>> +
>> +static struct workqueue_struct *read_counters_wq;
>> +struct read_counters_work {
>> +     struct work_struct work;
>> +     struct tegra_cpu_ctr c;
>> +};
>> +
>> +static enum cluster get_cpu_cluster(u8 cpu)
>> +{
>> +     return MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
>> +}
>> +
>> +/*
>> + * Read per-core Read-only system register NVFREQ_FEEDBACK_EL1.
>> + * The register provides frequency feedback information to
>> + * determine the average actual frequency a core has run at over
>> + * a period of time.
>> + *   [31:0] PLLP counter: Counts at fixed frequency (408 MHz)
>> + *   [63:32] Core clock counter: counts on every core clock cycle
>> + *                   where the core is architecturally clocking
>> + */
>> +static u64 read_freq_feedback(void)
>> +{
>> +     u64 val = 0;
>> +
>> +     asm volatile("mrs %0, s3_0_c15_c0_5" : "=r" (val) : );
>> +
>> +     return val;
>> +}
>> +
>> +u16 map_freq_to_ndiv(struct mrq_cpu_ndiv_limits_response *nltbl, u32 freq)
>> +{
>> +     return DIV_ROUND_UP(freq * nltbl->pdiv * nltbl->mdiv,
>> +                         nltbl->ref_clk_hz / KHZ);
>> +}
>> +
>> +static inline u32 map_ndiv_to_freq(struct mrq_cpu_ndiv_limits_response
>> +                                *nltbl, u16 ndiv)
>> +{
>> +     return nltbl->ref_clk_hz / KHZ * ndiv / (nltbl->pdiv * nltbl->mdiv);
>> +}
>> +
>> +static void tegra_read_counters(struct work_struct *work)
>> +{
>> +     struct read_counters_work *read_counters_work;
>> +     struct tegra_cpu_ctr *c;
>> +     u64 val;
>> +
>> +     /*
>> +      * ref_clk_counter(32 bit counter) runs on constant clk,
>> +      * pll_p(408MHz).
>> +      * It will take = 2 ^ 32 / 408 MHz to overflow ref clk counter
>> +      *              = 10526880 usec = 10.527 sec to overflow
>> +      *
>> +      * Like wise core_clk_counter(32 bit counter) runs on core clock.
>> +      * It's synchronized to crab_clk (cpu_crab_clk) which runs at
>> +      * freq of cluster. Assuming max cluster clock ~2000MHz,
>> +      * It will take = 2 ^ 32 / 2000 MHz to overflow core clk counter
>> +      *              = ~2.147 sec to overflow
>> +      */
>> +     read_counters_work = container_of(work, struct read_counters_work,
>> +                                       work);
>> +     c = &read_counters_work->c;
>> +
>> +     val = read_freq_feedback();
>> +     c->last_refclk_cnt = lower_32_bits(val);
>> +     c->last_coreclk_cnt = upper_32_bits(val);
>> +     udelay(c->delay);
>> +     val = read_freq_feedback();
>> +     c->refclk_cnt = lower_32_bits(val);
>> +     c->coreclk_cnt = upper_32_bits(val);
>> +}
>> +
>> +/*
>> + * Return instantaneous cpu speed
>> + * Instantaneous freq is calculated as -
>> + * -Takes sample on every query of getting the freq.
>> + *   - Read core and ref clock counters;
>> + *   - Delay for X us
>> + *   - Read above cycle counters again
>> + *   - Calculates freq by subtracting current and previous counters
>> + *     divided by the delay time or eqv. of ref_clk_counter in delta time
>> + *   - Return Kcycles/second, freq in KHz
>> + *
>> + *   delta time period = x sec
>> + *                     = delta ref_clk_counter / (408 * 10^6) sec
>> + *   freq in Hz = cycles/sec
>> + *              = (delta cycles / x sec
>> + *              = (delta cycles * 408 * 10^6) / delta ref_clk_counter
>> + *   in KHz     = (delta cycles * 408 * 10^3) / delta ref_clk_counter
>> + *
>> + * @cpu - logical cpu whose freq to be updated
>> + * Returns freq in KHz on success, 0 if cpu is offline
>> + */
>> +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
>> +{
>> +     struct read_counters_work read_counters_work;
>> +     struct tegra_cpu_ctr c;
>> +     u32 delta_refcnt;
>> +     u32 delta_ccnt;
>> +     u32 rate_mhz;
>> +
>> +     read_counters_work.c.cpu = cpu;
>> +     read_counters_work.c.delay = delay;
>> +     INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
>> +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
>> +     flush_work(&read_counters_work.work);
> 
> Why can't this be done in current context ?
> 
We used work queue instead of smp_call_function_single() to have long delay.

>> +     c = read_counters_work.c;
>> +
>> +     if (c.coreclk_cnt < c.last_coreclk_cnt)
>> +             delta_ccnt = c.coreclk_cnt + (MAX_CNT - c.last_coreclk_cnt);
>> +     else
>> +             delta_ccnt = c.coreclk_cnt - c.last_coreclk_cnt;
>> +     if (!delta_ccnt)
>> +             return 0;
>> +
>> +     /* ref clock is 32 bits */
>> +     if (c.refclk_cnt < c.last_refclk_cnt)
>> +             delta_refcnt = c.refclk_cnt + (MAX_CNT - c.last_refclk_cnt);
>> +     else
>> +             delta_refcnt = c.refclk_cnt - c.last_refclk_cnt;
>> +     if (!delta_refcnt) {
>> +             pr_debug("cpufreq: %d is idle, delta_refcnt: 0\n", cpu);
>> +             return 0;
>> +     }
>> +     rate_mhz = ((unsigned long)(delta_ccnt * REF_CLK_MHZ)) / delta_refcnt;
>> +
>> +     return (rate_mhz * KHZ); /* in KHz */
>> +}
>> +
>> +static unsigned int tegra194_get_speed(u32 cpu)
>> +{
>> +     return tegra194_get_speed_common(cpu, US_DELAY);
>> +}
>> +
>> +static unsigned int tegra194_fast_get_speed(u32 cpu)
>> +{
>> +     return tegra194_get_speed_common(cpu, US_DELAY_MIN);
> 
> Why is this required specially here ? Why can't you work with normal
> delay ?
> 
Less delay value used during init to reduce cpu boot time.

>> +}
>> +
>> +static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
>> +{
>> +     struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
>> +     int cluster = get_cpu_cluster(policy->cpu);
>> +
>> +     if (cluster >= data->num_clusters)
>> +             return -EINVAL;
>> +
>> +     policy->cur = tegra194_fast_get_speed(policy->cpu); /* boot freq */
>> +
>> +     /* set same policy for all cpus */
>> +     cpumask_copy(policy->cpus, cpu_possible_mask);
> 
> You are copying cpu_possible_mask mask here, and so this routine will
> get called only once.
> 
> I still don't understand the logic behind clusters and frequency
> tables.
> 
Currently, we use same policy for all CPU's to maximize throughput. Will 
add separate patch later to set policy as per cluster. But we are not 
using that in T194 due to perf reasons.

>> +
>> +     policy->freq_table = data->tables[cluster];
>> +     policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY;
>> +
>> +     return 0;
>> +}
>> +
>> +static void set_cpu_ndiv(void *data)
>> +{
>> +     struct cpufreq_frequency_table *tbl = data;
>> +     u64 ndiv_val = (u64)tbl->driver_data;
>> +
>> +     asm volatile("msr s3_0_c15_c0_4, %0" : : "r" (ndiv_val));
>> +}
>> +
>> +static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
>> +                                    unsigned int index)
>> +{
>> +     struct cpufreq_frequency_table *tbl = policy->freq_table + index;
>> +     static struct cpufreq_freqs freqs;
>> +
>> +     mutex_lock(&cpufreq_lock);
> 
> No need of lock here.
>
Done

>> +     freqs.old = policy->cur;
>> +     freqs.new = tbl->frequency;
>> +
>> +     cpufreq_freq_transition_begin(policy, &freqs);
>> +     on_each_cpu_mask(policy->cpus, set_cpu_ndiv, tbl, true);
> 
> When CPUs share clock line, why is this required for every CPU ?
>Per core NVFREQ_REQ system register is written to make frequency 
requests for the core. Cluster h/w then forwards the max(core0, core1) 
request to cluster NAFLL.

>> +     cpufreq_freq_transition_end(policy, &freqs, 0);
>> +
>> +     mutex_unlock(&cpufreq_lock);
>> +
>> +     return 0;
>> +}
>> +
>> +static struct cpufreq_driver tegra194_cpufreq_driver = {
>> +     .name = "tegra194",
>> +     .flags = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS |
>> +             CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_ASYNC_NOTIFICATION,
> 
> Why Async here ? I am really confused if I am not able to understand
> the driver or you :)
> 
Have removed the flag as it was not required. Thank you for pointing.

>> +     .verify = cpufreq_generic_frequency_table_verify,
>> +     .target_index = tegra194_cpufreq_set_target,
>> +     .get = tegra194_get_speed,
>> +     .init = tegra194_cpufreq_init,
>> +     .attr = cpufreq_generic_attr,
>> +};
>> +
>> +static void tegra194_cpufreq_free_resources(void)
>> +{
>> +     flush_workqueue(read_counters_wq);
>> +     destroy_workqueue(read_counters_wq);
>> +}
>> +
>> +static struct cpufreq_frequency_table *init_freq_table
> 
> Don't break line here, rather break after above *.
> 
Done

>> +             (struct platform_device *pdev, struct tegra_bpmp *bpmp,
>> +              unsigned int cluster_id)
>> +{
>> +     struct cpufreq_frequency_table *opp_table;
> 
> Please name it freq_table :)
> 
Done

>> +     struct mrq_cpu_ndiv_limits_response resp;
>> +     unsigned int num_freqs, ndiv, delta_ndiv;
>> +     struct mrq_cpu_ndiv_limits_request req;
>> +     struct tegra_bpmp_message msg;
>> +     u16 freq_table_step_size;
>> +     int err, index;
>> +
>> +     memset(&req, 0, sizeof(req));
>> +     req.cluster_id = cluster_id;
>> +
>> +     memset(&msg, 0, sizeof(msg));
>> +     msg.mrq = MRQ_CPU_NDIV_LIMITS;
>> +     msg.tx.data = &req;
>> +     msg.tx.size = sizeof(req);
>> +     msg.rx.data = &resp;
>> +     msg.rx.size = sizeof(resp);
>> +
>> +     err = tegra_bpmp_transfer(bpmp, &msg);
>> +     if (err)
>> +             return ERR_PTR(err);
>> +
>> +     /*
>> +      * Make sure frequency table step is a multiple of mdiv to match
>> +      * vhint table granularity.
>> +      */
>> +     freq_table_step_size = resp.mdiv *
>> +                     DIV_ROUND_UP(CPUFREQ_TBL_STEP_HZ, resp.ref_clk_hz);
>> +
>> +     dev_dbg(&pdev->dev, "cluster %d: frequency table step size: %d\n",
>> +             cluster_id, freq_table_step_size);
>> +
>> +     delta_ndiv = resp.ndiv_max - resp.ndiv_min;
>> +
>> +     if (unlikely(delta_ndiv == 0))
>> +             num_freqs = 1;
>> +     else
>> +             /* We store both ndiv_min and ndiv_max hence the +1 */
>> +             num_freqs = delta_ndiv / freq_table_step_size + 1;
>> +
>> +     num_freqs += (delta_ndiv % freq_table_step_size) ? 1 : 0;
>> +
>> +     opp_table = devm_kcalloc(&pdev->dev, num_freqs + 1, sizeof(*opp_table),
>> +                              GFP_KERNEL);
>> +     if (!opp_table)
>> +             return ERR_PTR(-ENOMEM);
>> +
>> +     for (index = 0, ndiv = resp.ndiv_min;
>> +                     ndiv < resp.ndiv_max;
>> +                     index++, ndiv += freq_table_step_size) {
>> +             opp_table[index].driver_data = ndiv;
>> +             opp_table[index].frequency = map_ndiv_to_freq(&resp, ndiv);
>> +     }
>> +
>> +     opp_table[index].driver_data = resp.ndiv_max;
>> +     opp_table[index++].frequency = map_ndiv_to_freq(&resp, resp.ndiv_max);
>> +     opp_table[index].frequency = CPUFREQ_TABLE_END;
>> +
>> +     return opp_table;
>> +}
>> +
>> +static int tegra194_cpufreq_probe(struct platform_device *pdev)
>> +{
>> +     struct tegra194_cpufreq_data *data;
>> +     struct tegra_bpmp *bpmp;
>> +     int err, i;
>> +
>> +     data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
>> +     if (!data)
>> +             return -ENOMEM;
>> +
>> +     data->num_clusters = MAX_CLUSTERS;
>> +     data->tables = devm_kcalloc(&pdev->dev, data->num_clusters,
>> +                                 sizeof(*data->tables), GFP_KERNEL);
>> +     if (!data->tables)
>> +             return -ENOMEM;
>> +
>> +     platform_set_drvdata(pdev, data);
>> +
>> +     read_counters_wq = alloc_workqueue("read_counters_wq", __WQ_LEGACY, 1);
>> +     if (!read_counters_wq) {
>> +             dev_err(&pdev->dev, "fail to create_workqueue\n");
>> +             return -EINVAL;
>> +     }
>> +
>> +     bpmp = of_tegra_bpmp_get();
>> +     if (IS_ERR(bpmp)) {
>> +             err = PTR_ERR(bpmp);
>> +             goto err_free_res;
>> +     }
>> +
>> +     for (i = 0; i < data->num_clusters; i++) {
>> +             data->tables[i] = init_freq_table(pdev, bpmp, i);
>> +             if (IS_ERR(data->tables[i])) {
>> +                     err = PTR_ERR(data->tables[i]);
>> +                     goto put_bpmp;
>> +             }
>> +     }
>> +
>> +     tegra_bpmp_put(bpmp);
>> +
>> +     tegra194_cpufreq_driver.driver_data = data;
>> +
>> +     err = cpufreq_register_driver(&tegra194_cpufreq_driver);
>> +     if (err)
>> +             goto err_free_res;
>> +
>> +     return err;
>> +
>> +put_bpmp:
>> +     tegra_bpmp_put(bpmp);
>> +err_free_res:
>> +     tegra194_cpufreq_free_resources();
>> +     return err;
>> +}
>> +
>> +static int tegra194_cpufreq_remove(struct platform_device *pdev)
>> +{
>> +     cpufreq_unregister_driver(&tegra194_cpufreq_driver);
>> +     tegra194_cpufreq_free_resources();
>> +
>> +     return 0;
>> +}
>> +
>> +static struct platform_driver tegra194_cpufreq_platform_driver = {
>> +     .driver = {
>> +             .name = "tegra194-cpufreq",
>> +     },
>> +     .probe = tegra194_cpufreq_probe,
>> +     .remove = tegra194_cpufreq_remove,
>> +};
>> +
>> +static int __init tegra_cpufreq_init(void)
> 
> I seem to be forgetting this, but should we use __init with modules or
> not ?
> 
Yes

Thankyou for the review. I will send v2 with suggested changes.

> --
> viresh
> 


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-04 18:38     ` sumitg
@ 2020-04-06  2:55       ` Viresh Kumar
  2020-04-07 18:18         ` sumitg
  0 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2020-04-06  2:55 UTC (permalink / raw)
  To: sumitg
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

On 05-04-20, 00:08, sumitg wrote:
> 
> 
> On 26/03/20 5:20 PM, Viresh Kumar wrote:
> > On 03-12-19, 23:02, Sumit Gupta wrote:
> > > diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
> > > +enum cluster {
> > > +     CLUSTER0,
> > > +     CLUSTER1,
> > > +     CLUSTER2,
> > > +     CLUSTER3,
> > 
> > All these have same CPUs ? Or big little kind of stuff ? How come they
> > have different frequency tables ?
> > 
> T194 SOC has homogeneous architecture where each cluster has two symmetric
> Carmel cores and and not big little. LUT's are per cluster and all LUT's
> have same values currently. Future SOC's may have different LUT values per
> cluster.

LUT ?

> > > +     MAX_CLUSTERS,
> > > +};

> > > +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
> > > +{
> > > +     struct read_counters_work read_counters_work;
> > > +     struct tegra_cpu_ctr c;
> > > +     u32 delta_refcnt;
> > > +     u32 delta_ccnt;
> > > +     u32 rate_mhz;
> > > +
> > > +     read_counters_work.c.cpu = cpu;
> > > +     read_counters_work.c.delay = delay;
> > > +     INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
> > > +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
> > > +     flush_work(&read_counters_work.work);
> > 
> > Why can't this be done in current context ?
> > 
> We used work queue instead of smp_call_function_single() to have long delay.

Please explain completely, you have raised more questions than you
answered :)

Why do you want to have long delays ?

> > > +static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
> > > +{
> > > +     struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
> > > +     int cluster = get_cpu_cluster(policy->cpu);
> > > +
> > > +     if (cluster >= data->num_clusters)
> > > +             return -EINVAL;
> > > +
> > > +     policy->cur = tegra194_fast_get_speed(policy->cpu); /* boot freq */
> > > +
> > > +     /* set same policy for all cpus */
> > > +     cpumask_copy(policy->cpus, cpu_possible_mask);
> > 
> > You are copying cpu_possible_mask mask here, and so this routine will
> > get called only once.
> > 
> > I still don't understand the logic behind clusters and frequency
> > tables.
> > 
> Currently, we use same policy for all CPU's to maximize throughput. Will add
> separate patch later to set policy as per cluster. But we are not using that
> in T194 due to perf reasons.

You can't misrepresent the hardware this way, sorry.

Again few questions, I understand that you have multiple clusters
here:

- All clusters will always have the frequency table ?
- All clusters are capable of having a different frequency at any
  point of time ? Or they will always run at same freq ?

> > > +     freqs.old = policy->cur;
> > > +     freqs.new = tbl->frequency;
> > > +
> > > +     cpufreq_freq_transition_begin(policy, &freqs);
> > > +     on_each_cpu_mask(policy->cpus, set_cpu_ndiv, tbl, true);
> > 
> > When CPUs share clock line, why is this required for every CPU ?
> > Per core NVFREQ_REQ system register is written to make frequency
> requests for the core. Cluster h/w then forwards the max(core0, core1)
> request to cluster NAFLL.

You mean that each cluster can set frequency independently ?

If all the clusters can run at independent frequencies but you still
want them to run at same frequency, then that can be done with a
single set of governor tunables, which is the default anyway. So this
should just work for you.

I will not be reviewing the v2 version you sent for now as that is
most likely wrong anyway.

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2019-12-04  9:51         ` Viresh Kumar
@ 2020-04-07 10:05           ` Thierry Reding
  2020-04-27  7:18             ` Thierry Reding
  2020-05-20 14:43             ` Rob Herring
  0 siblings, 2 replies; 40+ messages in thread
From: Thierry Reding @ 2020-04-07 10:05 UTC (permalink / raw)
  To: Rob Herring, Viresh Kumar, Rafael J. Wysocki
  Cc: Mikko Perttunen, Sumit Gupta, catalin.marinas, will, jonathanh,
	talho, linux-pm, linux-tegra, linux-arm-kernel, linux-kernel,
	bbasu, mperttunen, devicetree

[-- Attachment #1: Type: text/plain, Size: 3505 bytes --]

On Wed, Dec 04, 2019 at 03:21:38PM +0530, Viresh Kumar wrote:
> On 04-12-19, 10:33, Thierry Reding wrote:
> > Yeah, the code that registers this device is in drivers/base/cpu.c in
> > register_cpu(). It even retrieves the device tree node for the CPU from
> > device tree and stores it in cpu->dev.of_node, so we should be able to
> > just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
> > to the BPMP.
> > 
> > That said, I'm wondering if perhaps we could just add a compatible
> > string to the /cpus node for cases like this where we don't have an
> > actual device representing the CPU complex. There are a number of CPU
> > frequency drivers that register dummy devices just so that they have
> > something to bind a driver to.
> > 
> > If we allow the /cpus node to represent the CPU complex (if no other
> > "device" does that yet), we can add a compatible string and have the
> > cpufreq driver match on that.
> > 
> > Of course this would be slightly difficult to retrofit into existing
> > drivers because they'd need to remain backwards compatible with existing
> > device trees. But it would allow future drivers to do this a little more
> > elegantly. For some SoCs this may not matter, but especially once you
> > start depending on additional resources this would come in handy.
> > 
> > Adding Rob and the device tree mailing list for feedback on this idea.
> 
> Took some time to find this thread, but something around this was
> suggested by Rafael earlier.
> 
> https://lore.kernel.org/lkml/8139001.Q4eV8YG1Il@vostro.rjw.lan/

I gave this a try and came up with the following:

--- >8 ---
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index f4ede86e32b4..e4462f95f0b3 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1764,6 +1764,9 @@ bpmp_thermal: thermal {
 	};
 
 	cpus {
+		compatible = "nvidia,tegra194-ccplex";
+		nvidia,bpmp = <&bpmp>;
+
 		#address-cells = <1>;
 		#size-cells = <0>;
 
--- >8 ---

Now I can do something rougly like this, although I have a more complete
patch locally that also gets rid of all the global variables because we
now actually have a struct platform_device that we can anchor everything
at:

--- >8 ---
static const struct of_device_id tegra194_cpufreq_of_match[] = {
	{ .compatible = "nvidia,tegra194-ccplex", },
	{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match);

static struct platform_driver tegra194_ccplex_driver = {
	.driver = {
		.name = "tegra194-cpufreq",
		.of_match_table = tegra194_cpufreq_of_match,
	},
	.probe = tegra194_cpufreq_probe,
	.remove = tegra194_cpufreq_remove,
};
module_platform_driver(tegra194_ccplex_driver);
--- >8 ---

I don't think that's exactly what Rafael (Cc'ed) had in mind, since the
above thread seems to have mostly talked about binding a driver to each
individual CPU.

But this seems a lot better than having to instantiate a device from
scratch just so that a driver can bind to it and it allows additional
properties to be associated with the CCPLEX device.

Rob, any thoughts on this from a device tree point of view? The /cpus
bindings don't mention the compatible property, but there doesn't seem
to be anything in the bindings that would prohibit its use.

If we can agree on that, I can forward my local changes to Sumit for
inclusion or reference.

Thierry

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-06  2:55       ` Viresh Kumar
@ 2020-04-07 18:18         ` sumitg
  2020-04-08  5:53           ` Viresh Kumar
  0 siblings, 1 reply; 40+ messages in thread
From: sumitg @ 2020-04-07 18:18 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen, sumitg



On 06/04/20 8:25 AM, Viresh Kumar wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 05-04-20, 00:08, sumitg wrote:
>>
>>
>> On 26/03/20 5:20 PM, Viresh Kumar wrote:
>>> On 03-12-19, 23:02, Sumit Gupta wrote:
>>>> diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
>>>> +enum cluster {
>>>> +     CLUSTER0,
>>>> +     CLUSTER1,
>>>> +     CLUSTER2,
>>>> +     CLUSTER3,
>>>
>>> All these have same CPUs ? Or big little kind of stuff ? How come they
>>> have different frequency tables ?
>>>
>> T194 SOC has homogeneous architecture where each cluster has two symmetric
>> Carmel cores and and not big little. LUT's are per cluster and all LUT's
>> have same values currently. Future SOC's may have different LUT values per
>> cluster.
> 
> LUT ?
>
LUT is "Lookup Table" which provides "hardware frequency request" as a 
function of voltage. For each frequency, the table can have multiple 
voltages based on temperature. The table is maintained in "BPMP R5".

>>>> +     MAX_CLUSTERS,
>>>> +};
> 
>>>> +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
>>>> +{
>>>> +     struct read_counters_work read_counters_work;
>>>> +     struct tegra_cpu_ctr c;
>>>> +     u32 delta_refcnt;
>>>> +     u32 delta_ccnt;
>>>> +     u32 rate_mhz;
>>>> +
>>>> +     read_counters_work.c.cpu = cpu;
>>>> +     read_counters_work.c.delay = delay;
>>>> +     INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
>>>> +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
>>>> +     flush_work(&read_counters_work.work);
>>>
>>> Why can't this be done in current context ?
>>>
>> We used work queue instead of smp_call_function_single() to have long delay.
> 
> Please explain completely, you have raised more questions than you
> answered :)
> 
> Why do you want to have long delays ?
> 
Long delay value is used to have the observation window long enough for 
correctly reconstructing the CPU frequency considering noise.
In next patch version, changed delay value to 500us which in our tests 
is considered reliable.

>>>> +static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
>>>> +{
>>>> +     struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
>>>> +     int cluster = get_cpu_cluster(policy->cpu);
>>>> +
>>>> +     if (cluster >= data->num_clusters)
>>>> +             return -EINVAL;
>>>> +
>>>> +     policy->cur = tegra194_fast_get_speed(policy->cpu); /* boot freq */
>>>> +
>>>> +     /* set same policy for all cpus */
>>>> +     cpumask_copy(policy->cpus, cpu_possible_mask);
>>>
>>> You are copying cpu_possible_mask mask here, and so this routine will
>>> get called only once.
>>>
>>> I still don't understand the logic behind clusters and frequency
>>> tables.
>>>
>> Currently, we use same policy for all CPU's to maximize throughput. Will add
>> separate patch later to set policy as per cluster. But we are not using that
>> in T194 due to perf reasons.
> 
> You can't misrepresent the hardware this way, sorry.
> 
ok. Updated the policy to be per cluster now.

> Again few questions, I understand that you have multiple clusters
> here:
> 
> - All clusters will always have the frequency table ?
Yes, frequency table is per cluster.

> - All clusters are capable of having a different frequency at any
>    point of time ? Or they will always run at same freq ?
>
Yes, all clusters are capable to run at different frequencies.

>>>> +     freqs.old = policy->cur;
>>>> +     freqs.new = tbl->frequency;
>>>> +
>>>> +     cpufreq_freq_transition_begin(policy, &freqs);
>>>> +     on_each_cpu_mask(policy->cpus, set_cpu_ndiv, tbl, true);
>>>
>>> When CPUs share clock line, why is this required for every CPU ?
>>> Per core NVFREQ_REQ system register is written to make frequency
>> requests for the core. Cluster h/w then forwards the max(core0, core1)
>> request to cluster NAFLL.
> 
> You mean that each cluster can set frequency independently ?
> 
Yes.

> If all the clusters can run at independent frequencies but you still
> want them to run at same frequency, then that can be done with a
> single set of governor tunables, which is the default anyway. So this
> should just work for you.
> 
> I will not be reviewing the v2 version you sent for now as that is
> most likely wrong anyway.
> 
> --
> viresh
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-07 18:18         ` sumitg
@ 2020-04-08  5:53           ` Viresh Kumar
  2020-04-08 11:24             ` sumitg
  0 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2020-04-08  5:53 UTC (permalink / raw)
  To: sumitg
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

On 07-04-20, 23:48, sumitg wrote:
> On 06/04/20 8:25 AM, Viresh Kumar wrote:
> > On 05-04-20, 00:08, sumitg wrote:
> > > On 26/03/20 5:20 PM, Viresh Kumar wrote:
> > > > On 03-12-19, 23:02, Sumit Gupta wrote:
> > > > > diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
> > > > > +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
> > > > > +{
> > > > > +     struct read_counters_work read_counters_work;
> > > > > +     struct tegra_cpu_ctr c;
> > > > > +     u32 delta_refcnt;
> > > > > +     u32 delta_ccnt;
> > > > > +     u32 rate_mhz;
> > > > > +
> > > > > +     read_counters_work.c.cpu = cpu;
> > > > > +     read_counters_work.c.delay = delay;
> > > > > +     INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
> > > > > +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
> > > > > +     flush_work(&read_counters_work.work);
> > > > 
> > > > Why can't this be done in current context ?
> > > > 
> > > We used work queue instead of smp_call_function_single() to have long delay.
> > 
> > Please explain completely, you have raised more questions than you
> > answered :)
> > 
> > Why do you want to have long delays ?
> > 
> Long delay value is used to have the observation window long enough for
> correctly reconstructing the CPU frequency considering noise.
> In next patch version, changed delay value to 500us which in our tests is
> considered reliable.

I understand that you need to put a udelay() while reading the freq from
hardware, that is fine, but why do you need a workqueue for that? Why can't you
just read the values directly from the same context ?

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-08  5:53           ` Viresh Kumar
@ 2020-04-08 11:24             ` sumitg
  2020-04-09  7:44               ` Viresh Kumar
  0 siblings, 1 reply; 40+ messages in thread
From: sumitg @ 2020-04-08 11:24 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen, sumitg



On 08/04/20 11:23 AM, Viresh Kumar wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 07-04-20, 23:48, sumitg wrote:
>> On 06/04/20 8:25 AM, Viresh Kumar wrote:
>>> On 05-04-20, 00:08, sumitg wrote:
>>>> On 26/03/20 5:20 PM, Viresh Kumar wrote:
>>>>> On 03-12-19, 23:02, Sumit Gupta wrote:
>>>>>> diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
>>>>>> +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
>>>>>> +{
>>>>>> +     struct read_counters_work read_counters_work;
>>>>>> +     struct tegra_cpu_ctr c;
>>>>>> +     u32 delta_refcnt;
>>>>>> +     u32 delta_ccnt;
>>>>>> +     u32 rate_mhz;
>>>>>> +
>>>>>> +     read_counters_work.c.cpu = cpu;
>>>>>> +     read_counters_work.c.delay = delay;
>>>>>> +     INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
>>>>>> +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
>>>>>> +     flush_work(&read_counters_work.work);
>>>>>
>>>>> Why can't this be done in current context ?
>>>>>
>>>> We used work queue instead of smp_call_function_single() to have long delay.
>>>
>>> Please explain completely, you have raised more questions than you
>>> answered :)
>>>
>>> Why do you want to have long delays ?
>>>
>> Long delay value is used to have the observation window long enough for
>> correctly reconstructing the CPU frequency considering noise.
>> In next patch version, changed delay value to 500us which in our tests is
>> considered reliable.
> 
> I understand that you need to put a udelay() while reading the freq from
> hardware, that is fine, but why do you need a workqueue for that? Why can't you
> just read the values directly from the same context ?
> 
The register to read frequency is per core and not accessible to other 
cores. So, we have to execute the function remotely as the target core 
to read frequency might be different from current.
The functions for that are smp_call_function_single or queue_work_on.
We used queue_work_on() to avoid long delay inside ipi interrupt context 
with interrupts disabled.

> --
> viresh
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-08 11:24             ` sumitg
@ 2020-04-09  7:44               ` Viresh Kumar
  2020-04-09 11:21                 ` Sumit Gupta
  0 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2020-04-09  7:44 UTC (permalink / raw)
  To: sumitg
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

On 08-04-20, 16:54, sumitg wrote:
> 
> 
> On 08/04/20 11:23 AM, Viresh Kumar wrote:
> > External email: Use caution opening links or attachments
> > 
> > 
> > On 07-04-20, 23:48, sumitg wrote:
> > > On 06/04/20 8:25 AM, Viresh Kumar wrote:
> > > > On 05-04-20, 00:08, sumitg wrote:
> > > > > On 26/03/20 5:20 PM, Viresh Kumar wrote:
> > > > > > On 03-12-19, 23:02, Sumit Gupta wrote:
> > > > > > > diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
> > > > > > > +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
> > > > > > > +{
> > > > > > > +     struct read_counters_work read_counters_work;
> > > > > > > +     struct tegra_cpu_ctr c;
> > > > > > > +     u32 delta_refcnt;
> > > > > > > +     u32 delta_ccnt;
> > > > > > > +     u32 rate_mhz;
> > > > > > > +
> > > > > > > +     read_counters_work.c.cpu = cpu;
> > > > > > > +     read_counters_work.c.delay = delay;
> > > > > > > +     INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);

Initialize the work only once from init routine.

> > > > > > > +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
> > > > > > > +     flush_work(&read_counters_work.work);
> > > > > > 
> > > > > > Why can't this be done in current context ?
> > > > > > 
> > > > > We used work queue instead of smp_call_function_single() to have long delay.
> > > > 
> > > > Please explain completely, you have raised more questions than you
> > > > answered :)
> > > > 
> > > > Why do you want to have long delays ?
> > > > 
> > > Long delay value is used to have the observation window long enough for
> > > correctly reconstructing the CPU frequency considering noise.
> > > In next patch version, changed delay value to 500us which in our tests is
> > > considered reliable.
> > 
> > I understand that you need to put a udelay() while reading the freq from
> > hardware, that is fine, but why do you need a workqueue for that? Why can't you
> > just read the values directly from the same context ?
> > 
> The register to read frequency is per core and not accessible to other
> cores. So, we have to execute the function remotely as the target core to
> read frequency might be different from current.
> The functions for that are smp_call_function_single or queue_work_on.
> We used queue_work_on() to avoid long delay inside ipi interrupt context
> with interrupts disabled.

Okay, I understand this now, finally :)

But if the interrupts are disabled during some call, won't workqueues face the
same problem ?

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-09  7:44               ` Viresh Kumar
@ 2020-04-09 11:21                 ` Sumit Gupta
  2020-04-13  6:21                   ` Viresh Kumar
  0 siblings, 1 reply; 40+ messages in thread
From: Sumit Gupta @ 2020-04-09 11:21 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen, Sumit Gupta



On 09/04/20 1:14 PM, Viresh Kumar wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 08-04-20, 16:54, sumitg wrote:
>>
>>
>> On 08/04/20 11:23 AM, Viresh Kumar wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> On 07-04-20, 23:48, sumitg wrote:
>>>> On 06/04/20 8:25 AM, Viresh Kumar wrote:
>>>>> On 05-04-20, 00:08, sumitg wrote:
>>>>>> On 26/03/20 5:20 PM, Viresh Kumar wrote:
>>>>>>> On 03-12-19, 23:02, Sumit Gupta wrote:
>>>>>>>> diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
>>>>>>>> +static unsigned int tegra194_get_speed_common(u32 cpu, u32 delay)
>>>>>>>> +{
>>>>>>>> +     struct read_counters_work read_counters_work;
>>>>>>>> +     struct tegra_cpu_ctr c;
>>>>>>>> +     u32 delta_refcnt;
>>>>>>>> +     u32 delta_ccnt;
>>>>>>>> +     u32 rate_mhz;
>>>>>>>> +
>>>>>>>> +     read_counters_work.c.cpu = cpu;
>>>>>>>> +     read_counters_work.c.delay = delay;
>>>>>>>> +     INIT_WORK_ONSTACK(&read_counters_work.work, tegra_read_counters);
> 
> Initialize the work only once from init routine.
> 
We are using "read_counters_work" as local variable. So every invocation 
the function will have its own copy of counters for corresponding cpu. 
That's why are doing INIT_WORK_ONSTACK here.

>>>>>>>> +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
>>>>>>>> +     flush_work(&read_counters_work.work);
>>>>>>>
>>>>>>> Why can't this be done in current context ?
>>>>>>>
>>>>>> We used work queue instead of smp_call_function_single() to have long delay.
>>>>>
>>>>> Please explain completely, you have raised more questions than you
>>>>> answered :)
>>>>>
>>>>> Why do you want to have long delays ?
>>>>>
>>>> Long delay value is used to have the observation window long enough for
>>>> correctly reconstructing the CPU frequency considering noise.
>>>> In next patch version, changed delay value to 500us which in our tests is
>>>> considered reliable.
>>>
>>> I understand that you need to put a udelay() while reading the freq from
>>> hardware, that is fine, but why do you need a workqueue for that? Why can't you
>>> just read the values directly from the same context ?
>>>
>> The register to read frequency is per core and not accessible to other
>> cores. So, we have to execute the function remotely as the target core to
>> read frequency might be different from current.
>> The functions for that are smp_call_function_single or queue_work_on.
>> We used queue_work_on() to avoid long delay inside ipi interrupt context
>> with interrupts disabled.
> 
> Okay, I understand this now, finally :)
> 
> But if the interrupts are disabled during some call, won't workqueues face the
> same problem ?
> 
Yes, we are trying to minimize the case.

> --
> viresh
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-09 11:21                 ` Sumit Gupta
@ 2020-04-13  6:21                   ` Viresh Kumar
  2020-04-13 12:20                     ` Sumit Gupta
  0 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2020-04-13  6:21 UTC (permalink / raw)
  To: Sumit Gupta
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

On 09-04-20, 16:51, Sumit Gupta wrote:
> We are using "read_counters_work" as local variable. So every invocation the
> function will have its own copy of counters for corresponding cpu. That's
> why are doing INIT_WORK_ONSTACK here.

Why? To support parallel calls to reading the freq ?

> > > > > > > > > +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
> > > > > > > > > +     flush_work(&read_counters_work.work);
> > > > > > > > 
> > > > > > > > Why can't this be done in current context ?
> > > > > > > > 
> > > > > > > We used work queue instead of smp_call_function_single() to have long delay.
> > > > > > 
> > > > > > Please explain completely, you have raised more questions than you
> > > > > > answered :)
> > > > > > 
> > > > > > Why do you want to have long delays ?
> > > > > > 
> > > > > Long delay value is used to have the observation window long enough for
> > > > > correctly reconstructing the CPU frequency considering noise.
> > > > > In next patch version, changed delay value to 500us which in our tests is
> > > > > considered reliable.
> > > > 
> > > > I understand that you need to put a udelay() while reading the freq from
> > > > hardware, that is fine, but why do you need a workqueue for that? Why can't you
> > > > just read the values directly from the same context ?
> > > > 
> > > The register to read frequency is per core and not accessible to other
> > > cores. So, we have to execute the function remotely as the target core to
> > > read frequency might be different from current.
> > > The functions for that are smp_call_function_single or queue_work_on.
> > > We used queue_work_on() to avoid long delay inside ipi interrupt context
> > > with interrupts disabled.
> > 
> > Okay, I understand this now, finally :)
> > 
> > But if the interrupts are disabled during some call, won't workqueues face the
> > same problem ?
> > 
> Yes, we are trying to minimize the case.

But how do you know workqueues will perform better than
smp_call_function_single() ? Just asking for clarity on this as normally
everyone tries to do smp_call_function_single().

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-13  6:21                   ` Viresh Kumar
@ 2020-04-13 12:20                     ` Sumit Gupta
  2020-04-14  5:45                       ` Viresh Kumar
  0 siblings, 1 reply; 40+ messages in thread
From: Sumit Gupta @ 2020-04-13 12:20 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen



On 13/04/20 11:51 AM, Viresh Kumar wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 09-04-20, 16:51, Sumit Gupta wrote:
>> We are using "read_counters_work" as local variable. So every invocation the
>> function will have its own copy of counters for corresponding cpu. That's
>> why are doing INIT_WORK_ONSTACK here.
> 
> Why? To support parallel calls to reading the freq ?
> 
Yes.

>>>>>>>>>> +     queue_work_on(cpu, read_counters_wq, &read_counters_work.work);
>>>>>>>>>> +     flush_work(&read_counters_work.work);
>>>>>>>>>
>>>>>>>>> Why can't this be done in current context ?
>>>>>>>>>
>>>>>>>> We used work queue instead of smp_call_function_single() to have long delay.
>>>>>>>
>>>>>>> Please explain completely, you have raised more questions than you
>>>>>>> answered :)
>>>>>>>
>>>>>>> Why do you want to have long delays ?
>>>>>>>
>>>>>> Long delay value is used to have the observation window long enough for
>>>>>> correctly reconstructing the CPU frequency considering noise.
>>>>>> In next patch version, changed delay value to 500us which in our tests is
>>>>>> considered reliable.
>>>>>
>>>>> I understand that you need to put a udelay() while reading the freq from
>>>>> hardware, that is fine, but why do you need a workqueue for that? Why can't you
>>>>> just read the values directly from the same context ?
>>>>>
>>>> The register to read frequency is per core and not accessible to other
>>>> cores. So, we have to execute the function remotely as the target core to
>>>> read frequency might be different from current.
>>>> The functions for that are smp_call_function_single or queue_work_on.
>>>> We used queue_work_on() to avoid long delay inside ipi interrupt context
>>>> with interrupts disabled.
>>>
>>> Okay, I understand this now, finally :)
>>>
>>> But if the interrupts are disabled during some call, won't workqueues face the
>>> same problem ?
>>>
>> Yes, we are trying to minimize the case.
> 
> But how do you know workqueues will perform better than
> smp_call_function_single() ? Just asking for clarity on this as normally
> everyone tries to do smp_call_function_single().
> 
This was done considering long delay value as explained previously.
Do you think that smp_call_function_single() would be better than work 
queue here?

> --
> viresh
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-13 12:20                     ` Sumit Gupta
@ 2020-04-14  5:45                       ` Viresh Kumar
  2020-04-15 11:25                         ` Sumit Gupta
  0 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2020-04-14  5:45 UTC (permalink / raw)
  To: Sumit Gupta
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

On 13-04-20, 17:50, Sumit Gupta wrote:
> This was done considering long delay value as explained previously.
> Do you think that smp_call_function_single() would be better than work queue
> here?

Don't work with assumptions, you should test both and see which one
works better. Workqueue should never be faster than
smp_call_function_single() with my understanding.

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-14  5:45                       ` Viresh Kumar
@ 2020-04-15 11:25                         ` Sumit Gupta
  2020-04-16  3:37                           ` Viresh Kumar
  0 siblings, 1 reply; 40+ messages in thread
From: Sumit Gupta @ 2020-04-15 11:25 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen



On 14/04/20 11:15 AM, Viresh Kumar wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 13-04-20, 17:50, Sumit Gupta wrote:
>> This was done considering long delay value as explained previously.
>> Do you think that smp_call_function_single() would be better than work queue
>> here?
> 
> Don't work with assumptions, you should test both and see which one
> works better. Workqueue should never be faster than
> smp_call_function_single() with my understanding.
Checked the time taken and its almost same in both cases.
Earlier we used smp_call_function_single(), but delay time period was 
small in that SOC. In T194, the time period was more. So, this is an 
optimization done because using work queue has advantage as interrupts 
will not be disabled for that period.
If you think work queue is not required, then can remove it. The 
functionality works fine in both cases.

> 
> --
> viresh
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-15 11:25                         ` Sumit Gupta
@ 2020-04-16  3:37                           ` Viresh Kumar
  2020-04-16  7:06                             ` Sumit Gupta
  0 siblings, 1 reply; 40+ messages in thread
From: Viresh Kumar @ 2020-04-16  3:37 UTC (permalink / raw)
  To: Sumit Gupta
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen

On 15-04-20, 16:55, Sumit Gupta wrote:
> 
> 
> On 14/04/20 11:15 AM, Viresh Kumar wrote:
> > External email: Use caution opening links or attachments
> > 
> > 
> > On 13-04-20, 17:50, Sumit Gupta wrote:
> > > This was done considering long delay value as explained previously.
> > > Do you think that smp_call_function_single() would be better than work queue
> > > here?
> > 
> > Don't work with assumptions, you should test both and see which one
> > works better. Workqueue should never be faster than
> > smp_call_function_single() with my understanding.
> Checked the time taken and its almost same in both cases.
> Earlier we used smp_call_function_single(), but delay time period was small
> in that SOC. In T194, the time period was more. So, this is an optimization
> done because using work queue has advantage as interrupts will not be
> disabled for that period.

Hmm, okay, keep the workqueue and mention the required details in a
comment for everyone to understand why the implementation is done that
way.

-- 
viresh

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver
  2020-04-16  3:37                           ` Viresh Kumar
@ 2020-04-16  7:06                             ` Sumit Gupta
  0 siblings, 0 replies; 40+ messages in thread
From: Sumit Gupta @ 2020-04-16  7:06 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: rjw, catalin.marinas, will, thierry.reding, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen, Sumit Gupta



On 16/04/20 9:07 AM, Viresh Kumar wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 15-04-20, 16:55, Sumit Gupta wrote:
>>
>>
>> On 14/04/20 11:15 AM, Viresh Kumar wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> On 13-04-20, 17:50, Sumit Gupta wrote:
>>>> This was done considering long delay value as explained previously.
>>>> Do you think that smp_call_function_single() would be better than work queue
>>>> here?
>>>
>>> Don't work with assumptions, you should test both and see which one
>>> works better. Workqueue should never be faster than
>>> smp_call_function_single() with my understanding.
>> Checked the time taken and its almost same in both cases.
>> Earlier we used smp_call_function_single(), but delay time period was small
>> in that SOC. In T194, the time period was more. So, this is an optimization
>> done because using work queue has advantage as interrupts will not be
>> disabled for that period.
> 
> Hmm, okay, keep the workqueue and mention the required details in a
> comment for everyone to understand why the implementation is done that
> way.
> 

sure, thank you!

> --
> viresh
> 

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2020-04-07 10:05           ` Thierry Reding
@ 2020-04-27  7:18             ` Thierry Reding
  2020-04-29  8:21               ` Sumit Gupta
  2020-05-06 16:58               ` Thierry Reding
  2020-05-20 14:43             ` Rob Herring
  1 sibling, 2 replies; 40+ messages in thread
From: Thierry Reding @ 2020-04-27  7:18 UTC (permalink / raw)
  To: Rob Herring, Viresh Kumar, Rafael J. Wysocki
  Cc: Mikko Perttunen, Sumit Gupta, catalin.marinas, will, jonathanh,
	talho, linux-pm, linux-tegra, linux-arm-kernel, linux-kernel,
	bbasu, mperttunen, devicetree

[-- Attachment #1: Type: text/plain, Size: 3934 bytes --]

On Tue, Apr 07, 2020 at 12:05:20PM +0200, Thierry Reding wrote:
> On Wed, Dec 04, 2019 at 03:21:38PM +0530, Viresh Kumar wrote:
> > On 04-12-19, 10:33, Thierry Reding wrote:
> > > Yeah, the code that registers this device is in drivers/base/cpu.c in
> > > register_cpu(). It even retrieves the device tree node for the CPU from
> > > device tree and stores it in cpu->dev.of_node, so we should be able to
> > > just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
> > > to the BPMP.
> > > 
> > > That said, I'm wondering if perhaps we could just add a compatible
> > > string to the /cpus node for cases like this where we don't have an
> > > actual device representing the CPU complex. There are a number of CPU
> > > frequency drivers that register dummy devices just so that they have
> > > something to bind a driver to.
> > > 
> > > If we allow the /cpus node to represent the CPU complex (if no other
> > > "device" does that yet), we can add a compatible string and have the
> > > cpufreq driver match on that.
> > > 
> > > Of course this would be slightly difficult to retrofit into existing
> > > drivers because they'd need to remain backwards compatible with existing
> > > device trees. But it would allow future drivers to do this a little more
> > > elegantly. For some SoCs this may not matter, but especially once you
> > > start depending on additional resources this would come in handy.
> > > 
> > > Adding Rob and the device tree mailing list for feedback on this idea.
> > 
> > Took some time to find this thread, but something around this was
> > suggested by Rafael earlier.
> > 
> > https://lore.kernel.org/lkml/8139001.Q4eV8YG1Il@vostro.rjw.lan/
> 
> I gave this a try and came up with the following:
> 
> --- >8 ---
> diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> index f4ede86e32b4..e4462f95f0b3 100644
> --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> @@ -1764,6 +1764,9 @@ bpmp_thermal: thermal {
>  	};
>  
>  	cpus {
> +		compatible = "nvidia,tegra194-ccplex";
> +		nvidia,bpmp = <&bpmp>;
> +
>  		#address-cells = <1>;
>  		#size-cells = <0>;
>  
> --- >8 ---
> 
> Now I can do something rougly like this, although I have a more complete
> patch locally that also gets rid of all the global variables because we
> now actually have a struct platform_device that we can anchor everything
> at:
> 
> --- >8 ---
> static const struct of_device_id tegra194_cpufreq_of_match[] = {
> 	{ .compatible = "nvidia,tegra194-ccplex", },
> 	{ /* sentinel */ }
> };
> MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match);
> 
> static struct platform_driver tegra194_ccplex_driver = {
> 	.driver = {
> 		.name = "tegra194-cpufreq",
> 		.of_match_table = tegra194_cpufreq_of_match,
> 	},
> 	.probe = tegra194_cpufreq_probe,
> 	.remove = tegra194_cpufreq_remove,
> };
> module_platform_driver(tegra194_ccplex_driver);
> --- >8 ---
> 
> I don't think that's exactly what Rafael (Cc'ed) had in mind, since the
> above thread seems to have mostly talked about binding a driver to each
> individual CPU.
> 
> But this seems a lot better than having to instantiate a device from
> scratch just so that a driver can bind to it and it allows additional
> properties to be associated with the CCPLEX device.
> 
> Rob, any thoughts on this from a device tree point of view? The /cpus
> bindings don't mention the compatible property, but there doesn't seem
> to be anything in the bindings that would prohibit its use.
> 
> If we can agree on that, I can forward my local changes to Sumit for
> inclusion or reference.

Rob, do you see any reason why we shouldn't be able to use a compatible
string in the /cpus node for devices such as Tegra194 where there is no
dedicated hardware block for the CCPLEX?

Thierry

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2020-04-27  7:18             ` Thierry Reding
@ 2020-04-29  8:21               ` Sumit Gupta
  2020-05-06 16:58               ` Thierry Reding
  1 sibling, 0 replies; 40+ messages in thread
From: Sumit Gupta @ 2020-04-29  8:21 UTC (permalink / raw)
  To: Thierry Reding, Rob Herring, Viresh Kumar, Rafael J. Wysocki
  Cc: Mikko Perttunen, catalin.marinas, will, jonathanh, talho,
	linux-pm, linux-tegra, linux-arm-kernel, linux-kernel, bbasu,
	mperttunen, devicetree, Sumit Gupta



On 27/04/20 12:48 PM, Thierry Reding wrote:
> On Tue, Apr 07, 2020 at 12:05:20PM +0200, Thierry Reding wrote:
>> On Wed, Dec 04, 2019 at 03:21:38PM +0530, Viresh Kumar wrote:
>>> On 04-12-19, 10:33, Thierry Reding wrote:
>>>> Yeah, the code that registers this device is in drivers/base/cpu.c in
>>>> register_cpu(). It even retrieves the device tree node for the CPU from
>>>> device tree and stores it in cpu->dev.of_node, so we should be able to
>>>> just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
>>>> to the BPMP.
>>>>
>>>> That said, I'm wondering if perhaps we could just add a compatible
>>>> string to the /cpus node for cases like this where we don't have an
>>>> actual device representing the CPU complex. There are a number of CPU
>>>> frequency drivers that register dummy devices just so that they have
>>>> something to bind a driver to.
>>>>
>>>> If we allow the /cpus node to represent the CPU complex (if no other
>>>> "device" does that yet), we can add a compatible string and have the
>>>> cpufreq driver match on that.
>>>>
>>>> Of course this would be slightly difficult to retrofit into existing
>>>> drivers because they'd need to remain backwards compatible with existing
>>>> device trees. But it would allow future drivers to do this a little more
>>>> elegantly. For some SoCs this may not matter, but especially once you
>>>> start depending on additional resources this would come in handy.
>>>>
>>>> Adding Rob and the device tree mailing list for feedback on this idea.
>>>
>>> Took some time to find this thread, but something around this was
>>> suggested by Rafael earlier.
>>>
>>> https://lore.kernel.org/lkml/8139001.Q4eV8YG1Il@vostro.rjw.lan/
>>
>> I gave this a try and came up with the following:
>>
>> --- >8 ---
>> diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
>> index f4ede86e32b4..e4462f95f0b3 100644
>> --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
>> +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
>> @@ -1764,6 +1764,9 @@ bpmp_thermal: thermal {
>>   	};
>>   
>>   	cpus {
>> +		compatible = "nvidia,tegra194-ccplex";
>> +		nvidia,bpmp = <&bpmp>;
>> +
>>   		#address-cells = <1>;
>>   		#size-cells = <0>;
>>   
>> --- >8 ---
>>
>> Now I can do something rougly like this, although I have a more complete
>> patch locally that also gets rid of all the global variables because we
>> now actually have a struct platform_device that we can anchor everything
>> at:
>>
>> --- >8 ---
>> static const struct of_device_id tegra194_cpufreq_of_match[] = {
>> 	{ .compatible = "nvidia,tegra194-ccplex", },
>> 	{ /* sentinel */ }
>> };
>> MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match);
>>
>> static struct platform_driver tegra194_ccplex_driver = {
>> 	.driver = {
>> 		.name = "tegra194-cpufreq",
>> 		.of_match_table = tegra194_cpufreq_of_match,
>> 	},
>> 	.probe = tegra194_cpufreq_probe,
>> 	.remove = tegra194_cpufreq_remove,
>> };
>> module_platform_driver(tegra194_ccplex_driver);
>> --- >8 ---
>>
>> I don't think that's exactly what Rafael (Cc'ed) had in mind, since the
>> above thread seems to have mostly talked about binding a driver to each
>> individual CPU.
>>
>> But this seems a lot better than having to instantiate a device from
>> scratch just so that a driver can bind to it and it allows additional
>> properties to be associated with the CCPLEX device.
>>
>> Rob, any thoughts on this from a device tree point of view? The /cpus
>> bindings don't mention the compatible property, but there doesn't seem
>> to be anything in the bindings that would prohibit its use.
>>
>> If we can agree on that, I can forward my local changes to Sumit for
>> inclusion or reference.
> 
> Rob, do you see any reason why we shouldn't be able to use a compatible
> string in the /cpus node for devices such as Tegra194 where there is no
> dedicated hardware block for the CCPLEX?
> 
> Thierry
> 

Ping.
Please suggest if we can add compatible string in the '/cpus' node.
If not then i propose accepting the current patch to get BPMP data 
without adding any property in respective driver's DT node.
We can push separate patch later if we need to add compatible string in 
the '/cpus' node or create new DT node for cpufreq.

Regards,
Sumit

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2020-04-27  7:18             ` Thierry Reding
  2020-04-29  8:21               ` Sumit Gupta
@ 2020-05-06 16:58               ` Thierry Reding
  1 sibling, 0 replies; 40+ messages in thread
From: Thierry Reding @ 2020-05-06 16:58 UTC (permalink / raw)
  To: Rob Herring, Viresh Kumar, Rafael J. Wysocki
  Cc: Mikko Perttunen, Sumit Gupta, catalin.marinas, will, jonathanh,
	talho, linux-pm, linux-tegra, linux-arm-kernel, linux-kernel,
	bbasu, mperttunen, devicetree

[-- Attachment #1: Type: text/plain, Size: 4379 bytes --]

On Mon, Apr 27, 2020 at 09:18:00AM +0200, Thierry Reding wrote:
> On Tue, Apr 07, 2020 at 12:05:20PM +0200, Thierry Reding wrote:
> > On Wed, Dec 04, 2019 at 03:21:38PM +0530, Viresh Kumar wrote:
> > > On 04-12-19, 10:33, Thierry Reding wrote:
> > > > Yeah, the code that registers this device is in drivers/base/cpu.c in
> > > > register_cpu(). It even retrieves the device tree node for the CPU from
> > > > device tree and stores it in cpu->dev.of_node, so we should be able to
> > > > just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
> > > > to the BPMP.
> > > > 
> > > > That said, I'm wondering if perhaps we could just add a compatible
> > > > string to the /cpus node for cases like this where we don't have an
> > > > actual device representing the CPU complex. There are a number of CPU
> > > > frequency drivers that register dummy devices just so that they have
> > > > something to bind a driver to.
> > > > 
> > > > If we allow the /cpus node to represent the CPU complex (if no other
> > > > "device" does that yet), we can add a compatible string and have the
> > > > cpufreq driver match on that.
> > > > 
> > > > Of course this would be slightly difficult to retrofit into existing
> > > > drivers because they'd need to remain backwards compatible with existing
> > > > device trees. But it would allow future drivers to do this a little more
> > > > elegantly. For some SoCs this may not matter, but especially once you
> > > > start depending on additional resources this would come in handy.
> > > > 
> > > > Adding Rob and the device tree mailing list for feedback on this idea.
> > > 
> > > Took some time to find this thread, but something around this was
> > > suggested by Rafael earlier.
> > > 
> > > https://lore.kernel.org/lkml/8139001.Q4eV8YG1Il@vostro.rjw.lan/
> > 
> > I gave this a try and came up with the following:
> > 
> > --- >8 ---
> > diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > index f4ede86e32b4..e4462f95f0b3 100644
> > --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > @@ -1764,6 +1764,9 @@ bpmp_thermal: thermal {
> >  	};
> >  
> >  	cpus {
> > +		compatible = "nvidia,tegra194-ccplex";
> > +		nvidia,bpmp = <&bpmp>;
> > +
> >  		#address-cells = <1>;
> >  		#size-cells = <0>;
> >  
> > --- >8 ---
> > 
> > Now I can do something rougly like this, although I have a more complete
> > patch locally that also gets rid of all the global variables because we
> > now actually have a struct platform_device that we can anchor everything
> > at:
> > 
> > --- >8 ---
> > static const struct of_device_id tegra194_cpufreq_of_match[] = {
> > 	{ .compatible = "nvidia,tegra194-ccplex", },
> > 	{ /* sentinel */ }
> > };
> > MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match);
> > 
> > static struct platform_driver tegra194_ccplex_driver = {
> > 	.driver = {
> > 		.name = "tegra194-cpufreq",
> > 		.of_match_table = tegra194_cpufreq_of_match,
> > 	},
> > 	.probe = tegra194_cpufreq_probe,
> > 	.remove = tegra194_cpufreq_remove,
> > };
> > module_platform_driver(tegra194_ccplex_driver);
> > --- >8 ---
> > 
> > I don't think that's exactly what Rafael (Cc'ed) had in mind, since the
> > above thread seems to have mostly talked about binding a driver to each
> > individual CPU.
> > 
> > But this seems a lot better than having to instantiate a device from
> > scratch just so that a driver can bind to it and it allows additional
> > properties to be associated with the CCPLEX device.
> > 
> > Rob, any thoughts on this from a device tree point of view? The /cpus
> > bindings don't mention the compatible property, but there doesn't seem
> > to be anything in the bindings that would prohibit its use.
> > 
> > If we can agree on that, I can forward my local changes to Sumit for
> > inclusion or reference.
> 
> Rob, do you see any reason why we shouldn't be able to use a compatible
> string in the /cpus node for devices such as Tegra194 where there is no
> dedicated hardware block for the CCPLEX?

Rob, can you take a brief look and provide some feedback on this? I'd
like to get this merged for v5.8 and where to instantiate this is the
only thing holding this up at this point.

Thanks,
Thierry

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2020-04-07 10:05           ` Thierry Reding
  2020-04-27  7:18             ` Thierry Reding
@ 2020-05-20 14:43             ` Rob Herring
  2020-05-20 15:38               ` Thierry Reding
  1 sibling, 1 reply; 40+ messages in thread
From: Rob Herring @ 2020-05-20 14:43 UTC (permalink / raw)
  To: Thierry Reding
  Cc: Viresh Kumar, Rafael J. Wysocki, Mikko Perttunen, Sumit Gupta,
	Catalin Marinas, Will Deacon, Jon Hunter, Timo Alho,
	open list:THERMAL, linux-tegra,
	moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE,
	linux-kernel, bbasu, Mikko Perttunen, devicetree

On Tue, Apr 7, 2020 at 4:05 AM Thierry Reding <thierry.reding@gmail.com> wrote:
>
> On Wed, Dec 04, 2019 at 03:21:38PM +0530, Viresh Kumar wrote:
> > On 04-12-19, 10:33, Thierry Reding wrote:
> > > Yeah, the code that registers this device is in drivers/base/cpu.c in
> > > register_cpu(). It even retrieves the device tree node for the CPU from
> > > device tree and stores it in cpu->dev.of_node, so we should be able to
> > > just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
> > > to the BPMP.
> > >
> > > That said, I'm wondering if perhaps we could just add a compatible
> > > string to the /cpus node for cases like this where we don't have an
> > > actual device representing the CPU complex. There are a number of CPU
> > > frequency drivers that register dummy devices just so that they have
> > > something to bind a driver to.
> > >
> > > If we allow the /cpus node to represent the CPU complex (if no other
> > > "device" does that yet), we can add a compatible string and have the
> > > cpufreq driver match on that.
> > >
> > > Of course this would be slightly difficult to retrofit into existing
> > > drivers because they'd need to remain backwards compatible with existing
> > > device trees. But it would allow future drivers to do this a little more
> > > elegantly. For some SoCs this may not matter, but especially once you
> > > start depending on additional resources this would come in handy.
> > >
> > > Adding Rob and the device tree mailing list for feedback on this idea.
> >
> > Took some time to find this thread, but something around this was
> > suggested by Rafael earlier.
> >
> > https://lore.kernel.org/lkml/8139001.Q4eV8YG1Il@vostro.rjw.lan/
>
> I gave this a try and came up with the following:
>
> --- >8 ---
> diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> index f4ede86e32b4..e4462f95f0b3 100644
> --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> @@ -1764,6 +1764,9 @@ bpmp_thermal: thermal {
>         };
>
>         cpus {
> +               compatible = "nvidia,tegra194-ccplex";
> +               nvidia,bpmp = <&bpmp>;

Is there more than 1 bpmp? If not you don't need this. Just lookup the
node by compatible.


> +
>                 #address-cells = <1>;
>                 #size-cells = <0>;
>
> --- >8 ---
>
> Now I can do something rougly like this, although I have a more complete
> patch locally that also gets rid of all the global variables because we
> now actually have a struct platform_device that we can anchor everything
> at:
>
> --- >8 ---
> static const struct of_device_id tegra194_cpufreq_of_match[] = {
>         { .compatible = "nvidia,tegra194-ccplex", },
>         { /* sentinel */ }
> };
> MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match);
>
> static struct platform_driver tegra194_ccplex_driver = {
>         .driver = {
>                 .name = "tegra194-cpufreq",
>                 .of_match_table = tegra194_cpufreq_of_match,
>         },
>         .probe = tegra194_cpufreq_probe,
>         .remove = tegra194_cpufreq_remove,
> };
> module_platform_driver(tegra194_ccplex_driver);
> --- >8 ---
>
> I don't think that's exactly what Rafael (Cc'ed) had in mind, since the
> above thread seems to have mostly talked about binding a driver to each
> individual CPU.
>
> But this seems a lot better than having to instantiate a device from
> scratch just so that a driver can bind to it and it allows additional
> properties to be associated with the CCPLEX device.

What additional properties? A continual stream of properties added 1
by 1 would negatively affect my opinion of this.

> Rob, any thoughts on this from a device tree point of view? The /cpus
> bindings don't mention the compatible property, but there doesn't seem
> to be anything in the bindings that would prohibit its use.

What happens when you have more than one cpu related driver in
addition to cpufreq? You may still have to end up creating child
platform devices and then gained very little.

You could solve this without DT changes. You can bind on node names.
The driver probe can then check the parent compatible and return if
not matching. I'm not sure if you could get module auto loading to
work in that case. It would have to be based on the root compatible
(rather than the driver match table) and be able to load multiple
matching modules.

Rob

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2020-05-20 14:43             ` Rob Herring
@ 2020-05-20 15:38               ` Thierry Reding
  2020-05-20 16:21                 ` Rob Herring
  0 siblings, 1 reply; 40+ messages in thread
From: Thierry Reding @ 2020-05-20 15:38 UTC (permalink / raw)
  To: Rob Herring
  Cc: Viresh Kumar, Rafael J. Wysocki, Mikko Perttunen, Sumit Gupta,
	Catalin Marinas, Will Deacon, Jon Hunter, Timo Alho,
	open list:THERMAL, linux-tegra,
	moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE,
	linux-kernel, bbasu, Mikko Perttunen, devicetree

[-- Attachment #1: Type: text/plain, Size: 7097 bytes --]

On Wed, May 20, 2020 at 08:43:03AM -0600, Rob Herring wrote:
> On Tue, Apr 7, 2020 at 4:05 AM Thierry Reding <thierry.reding@gmail.com> wrote:
> >
> > On Wed, Dec 04, 2019 at 03:21:38PM +0530, Viresh Kumar wrote:
> > > On 04-12-19, 10:33, Thierry Reding wrote:
> > > > Yeah, the code that registers this device is in drivers/base/cpu.c in
> > > > register_cpu(). It even retrieves the device tree node for the CPU from
> > > > device tree and stores it in cpu->dev.of_node, so we should be able to
> > > > just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
> > > > to the BPMP.
> > > >
> > > > That said, I'm wondering if perhaps we could just add a compatible
> > > > string to the /cpus node for cases like this where we don't have an
> > > > actual device representing the CPU complex. There are a number of CPU
> > > > frequency drivers that register dummy devices just so that they have
> > > > something to bind a driver to.
> > > >
> > > > If we allow the /cpus node to represent the CPU complex (if no other
> > > > "device" does that yet), we can add a compatible string and have the
> > > > cpufreq driver match on that.
> > > >
> > > > Of course this would be slightly difficult to retrofit into existing
> > > > drivers because they'd need to remain backwards compatible with existing
> > > > device trees. But it would allow future drivers to do this a little more
> > > > elegantly. For some SoCs this may not matter, but especially once you
> > > > start depending on additional resources this would come in handy.
> > > >
> > > > Adding Rob and the device tree mailing list for feedback on this idea.
> > >
> > > Took some time to find this thread, but something around this was
> > > suggested by Rafael earlier.
> > >
> > > https://lore.kernel.org/lkml/8139001.Q4eV8YG1Il@vostro.rjw.lan/
> >
> > I gave this a try and came up with the following:
> >
> > --- >8 ---
> > diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > index f4ede86e32b4..e4462f95f0b3 100644
> > --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > @@ -1764,6 +1764,9 @@ bpmp_thermal: thermal {
> >         };
> >
> >         cpus {
> > +               compatible = "nvidia,tegra194-ccplex";
> > +               nvidia,bpmp = <&bpmp>;
> 
> Is there more than 1 bpmp? If not you don't need this. Just lookup the
> node by compatible.

There no SoCs currently than need to differentiate between multiple
BPMPs, so yes, it would be possible to look up the node by compatible.
But we also used to assume that PCs would only ever come with a single
GPU or audio card and that's always caused a lot of work to clean up
when it turned out to no longer be true.

Also, we already have a couple of devices referencing the BPMP by
phandle like this, so having this in a CCPLEX node would keep things
consistent.

One of the reasons why we initially did it this way was also so that we
could make the dependencies explicit within device tree. If we look up
by compatible string, then the driver is the only one with the knowledge
about where to get at it. If we have the explicit reference we at least
have a chance of determining the dependency by just looking at the
device tree.

> > +
> >                 #address-cells = <1>;
> >                 #size-cells = <0>;
> >
> > --- >8 ---
> >
> > Now I can do something rougly like this, although I have a more complete
> > patch locally that also gets rid of all the global variables because we
> > now actually have a struct platform_device that we can anchor everything
> > at:
> >
> > --- >8 ---
> > static const struct of_device_id tegra194_cpufreq_of_match[] = {
> >         { .compatible = "nvidia,tegra194-ccplex", },
> >         { /* sentinel */ }
> > };
> > MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match);
> >
> > static struct platform_driver tegra194_ccplex_driver = {
> >         .driver = {
> >                 .name = "tegra194-cpufreq",
> >                 .of_match_table = tegra194_cpufreq_of_match,
> >         },
> >         .probe = tegra194_cpufreq_probe,
> >         .remove = tegra194_cpufreq_remove,
> > };
> > module_platform_driver(tegra194_ccplex_driver);
> > --- >8 ---
> >
> > I don't think that's exactly what Rafael (Cc'ed) had in mind, since the
> > above thread seems to have mostly talked about binding a driver to each
> > individual CPU.
> >
> > But this seems a lot better than having to instantiate a device from
> > scratch just so that a driver can bind to it and it allows additional
> > properties to be associated with the CCPLEX device.
> 
> What additional properties? A continual stream of properties added 1
> by 1 would negatively affect my opinion of this.

I don't expect there would be many. I think there's an earlier
generation of Tegra that requires a regulator and I can imagine that's
pretty common. But other than that I would expect this to be a fairly
narrow set of properties.

> > Rob, any thoughts on this from a device tree point of view? The /cpus
> > bindings don't mention the compatible property, but there doesn't seem
> > to be anything in the bindings that would prohibit its use.
> 
> What happens when you have more than one cpu related driver in
> addition to cpufreq? You may still have to end up creating child
> platform devices and then gained very little.

That's only if you absolutely want to stick with the "one driver per
subsystem" model. I personally think that's completely obsolete these
days. If you have a CPU complex device that can do both CPU frequency
scaling and put the CPU into idle states, for example, then there is
really no reason to artificially split that into two separate drivers
just to match the subsystems that we have.

Most subsystems that I've come across work just fine if a single driver
registers with multiple subsystems.

I also know that some people like it better when things are nicely split
up into multiple drivers. But I really don't see how that simplifies
things. In fact in my opinion that makes things only more complicated
because you have additional boilerplate and then you need to be extra
careful about how these different drivers are ordered, and you need to
take extra precautions when sharing things like clocks and register
regions.

> You could solve this without DT changes. You can bind on node names.
> The driver probe can then check the parent compatible and return if
> not matching. I'm not sure if you could get module auto loading to
> work in that case. It would have to be based on the root compatible
> (rather than the driver match table) and be able to load multiple
> matching modules.

That sounds like it would get very complicated for something this
simple. Having a compatible string in /cpus seemed like the most logical
option because it would basically just work out of the box and the same
way we're used to from other devices.

Thierry

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data
  2020-05-20 15:38               ` Thierry Reding
@ 2020-05-20 16:21                 ` Rob Herring
  0 siblings, 0 replies; 40+ messages in thread
From: Rob Herring @ 2020-05-20 16:21 UTC (permalink / raw)
  To: Thierry Reding
  Cc: Viresh Kumar, Rafael J. Wysocki, Mikko Perttunen, Sumit Gupta,
	Catalin Marinas, Will Deacon, Jon Hunter, Timo Alho,
	open list:THERMAL, linux-tegra,
	moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE,
	linux-kernel, bbasu, Mikko Perttunen, devicetree

On Wed, May 20, 2020 at 9:39 AM Thierry Reding <thierry.reding@gmail.com> wrote:
>
> On Wed, May 20, 2020 at 08:43:03AM -0600, Rob Herring wrote:
> > On Tue, Apr 7, 2020 at 4:05 AM Thierry Reding <thierry.reding@gmail.com> wrote:
> > >
> > > On Wed, Dec 04, 2019 at 03:21:38PM +0530, Viresh Kumar wrote:
> > > > On 04-12-19, 10:33, Thierry Reding wrote:
> > > > > Yeah, the code that registers this device is in drivers/base/cpu.c in
> > > > > register_cpu(). It even retrieves the device tree node for the CPU from
> > > > > device tree and stores it in cpu->dev.of_node, so we should be able to
> > > > > just pass &cpu->dev to tegra_bpmp_get() in order to retrieve a reference
> > > > > to the BPMP.
> > > > >
> > > > > That said, I'm wondering if perhaps we could just add a compatible
> > > > > string to the /cpus node for cases like this where we don't have an
> > > > > actual device representing the CPU complex. There are a number of CPU
> > > > > frequency drivers that register dummy devices just so that they have
> > > > > something to bind a driver to.
> > > > >
> > > > > If we allow the /cpus node to represent the CPU complex (if no other
> > > > > "device" does that yet), we can add a compatible string and have the
> > > > > cpufreq driver match on that.
> > > > >
> > > > > Of course this would be slightly difficult to retrofit into existing
> > > > > drivers because they'd need to remain backwards compatible with existing
> > > > > device trees. But it would allow future drivers to do this a little more
> > > > > elegantly. For some SoCs this may not matter, but especially once you
> > > > > start depending on additional resources this would come in handy.
> > > > >
> > > > > Adding Rob and the device tree mailing list for feedback on this idea.
> > > >
> > > > Took some time to find this thread, but something around this was
> > > > suggested by Rafael earlier.
> > > >
> > > > https://lore.kernel.org/lkml/8139001.Q4eV8YG1Il@vostro.rjw.lan/
> > >
> > > I gave this a try and came up with the following:
> > >
> > > --- >8 ---
> > > diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > > index f4ede86e32b4..e4462f95f0b3 100644
> > > --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > > +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
> > > @@ -1764,6 +1764,9 @@ bpmp_thermal: thermal {
> > >         };
> > >
> > >         cpus {
> > > +               compatible = "nvidia,tegra194-ccplex";
> > > +               nvidia,bpmp = <&bpmp>;
> >
> > Is there more than 1 bpmp? If not you don't need this. Just lookup the
> > node by compatible.
>
> There no SoCs currently than need to differentiate between multiple
> BPMPs, so yes, it would be possible to look up the node by compatible.
> But we also used to assume that PCs would only ever come with a single
> GPU or audio card and that's always caused a lot of work to clean up
> when it turned out to no longer be true.

Job security. ;)

> Also, we already have a couple of devices referencing the BPMP by
> phandle like this, so having this in a CCPLEX node would keep things
> consistent.
>
> One of the reasons why we initially did it this way was also so that we
> could make the dependencies explicit within device tree. If we look up
> by compatible string, then the driver is the only one with the knowledge
> about where to get at it. If we have the explicit reference we at least
> have a chance of determining the dependency by just looking at the
> device tree.

This case probably makes sense, but then driver dependencies can
evolve and you'd be updating the DT for every dependency. There's just
this general mindset that a driver can't look at the DT outside of its
own node.

> > >                 #address-cells = <1>;
> > >                 #size-cells = <0>;
> > >
> > > --- >8 ---
> > >
> > > Now I can do something rougly like this, although I have a more complete
> > > patch locally that also gets rid of all the global variables because we
> > > now actually have a struct platform_device that we can anchor everything
> > > at:
> > >
> > > --- >8 ---
> > > static const struct of_device_id tegra194_cpufreq_of_match[] = {
> > >         { .compatible = "nvidia,tegra194-ccplex", },
> > >         { /* sentinel */ }
> > > };
> > > MODULE_DEVICE_TABLE(of, tegra194_cpufreq_of_match);
> > >
> > > static struct platform_driver tegra194_ccplex_driver = {
> > >         .driver = {
> > >                 .name = "tegra194-cpufreq",
> > >                 .of_match_table = tegra194_cpufreq_of_match,
> > >         },
> > >         .probe = tegra194_cpufreq_probe,
> > >         .remove = tegra194_cpufreq_remove,
> > > };
> > > module_platform_driver(tegra194_ccplex_driver);
> > > --- >8 ---
> > >
> > > I don't think that's exactly what Rafael (Cc'ed) had in mind, since the
> > > above thread seems to have mostly talked about binding a driver to each
> > > individual CPU.
> > >
> > > But this seems a lot better than having to instantiate a device from
> > > scratch just so that a driver can bind to it and it allows additional
> > > properties to be associated with the CCPLEX device.
> >
> > What additional properties? A continual stream of properties added 1
> > by 1 would negatively affect my opinion of this.
>
> I don't expect there would be many. I think there's an earlier
> generation of Tegra that requires a regulator and I can imagine that's
> pretty common. But other than that I would expect this to be a fairly
> narrow set of properties.
>
> > > Rob, any thoughts on this from a device tree point of view? The /cpus
> > > bindings don't mention the compatible property, but there doesn't seem
> > > to be anything in the bindings that would prohibit its use.
> >
> > What happens when you have more than one cpu related driver in
> > addition to cpufreq? You may still have to end up creating child
> > platform devices and then gained very little.
>
> That's only if you absolutely want to stick with the "one driver per
> subsystem" model. I personally think that's completely obsolete these
> days. If you have a CPU complex device that can do both CPU frequency
> scaling and put the CPU into idle states, for example, then there is
> really no reason to artificially split that into two separate drivers
> just to match the subsystems that we have.
>
> Most subsystems that I've come across work just fine if a single driver
> registers with multiple subsystems.

Yes exactly. If only everyone thought this way...

> I also know that some people like it better when things are nicely split
> up into multiple drivers. But I really don't see how that simplifies
> things. In fact in my opinion that makes things only more complicated
> because you have additional boilerplate and then you need to be extra
> careful about how these different drivers are ordered, and you need to
> take extra precautions when sharing things like clocks and register
> regions.

I just cleaned up this exact mess with VExpress drivers...

It's just a constant issue to deal with.

> > You could solve this without DT changes. You can bind on node names.
> > The driver probe can then check the parent compatible and return if
> > not matching. I'm not sure if you could get module auto loading to
> > work in that case. It would have to be based on the root compatible
> > (rather than the driver match table) and be able to load multiple
> > matching modules.
>
> That sounds like it would get very complicated for something this
> simple. Having a compatible string in /cpus seemed like the most logical
> option because it would basically just work out of the box and the same
> way we're used to from other devices.

That's also why I get the node per driver...


That said, I'm fine with adding the compatible. I hope I don't regret it.

Rob

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2020-05-20 16:21 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-03 17:32 [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data Sumit Gupta
2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 2/3] cpufreq: Add Tegra194 cpufreq driver Sumit Gupta
2019-12-04  5:40   ` Viresh Kumar
2019-12-04 10:55     ` sumitg
2019-12-04 11:27       ` Viresh Kumar
2019-12-04 13:57         ` Dmitry Osipenko
2019-12-05  2:51           ` Viresh Kumar
2019-12-05 12:55             ` Dmitry Osipenko
2020-03-25 23:59         ` sumitg
2019-12-04 13:59   ` Dmitry Osipenko
2019-12-05 14:15   ` Dmitry Osipenko
2020-03-26 11:50   ` Viresh Kumar
2020-04-04 18:38     ` sumitg
2020-04-06  2:55       ` Viresh Kumar
2020-04-07 18:18         ` sumitg
2020-04-08  5:53           ` Viresh Kumar
2020-04-08 11:24             ` sumitg
2020-04-09  7:44               ` Viresh Kumar
2020-04-09 11:21                 ` Sumit Gupta
2020-04-13  6:21                   ` Viresh Kumar
2020-04-13 12:20                     ` Sumit Gupta
2020-04-14  5:45                       ` Viresh Kumar
2020-04-15 11:25                         ` Sumit Gupta
2020-04-16  3:37                           ` Viresh Kumar
2020-04-16  7:06                             ` Sumit Gupta
2019-12-03 17:32 ` [TEGRA194_CPUFREQ Patch 3/3] arm64: defconfig: Enable CONFIG_ARM_TEGRA194_CPUFREQ Sumit Gupta
2019-12-03 17:42 ` [TEGRA194_CPUFREQ Patch 1/3] firmware: tegra: adding function to get BPMP data Thierry Reding
2019-12-04  8:45   ` Mikko Perttunen
2019-12-04  9:17     ` Viresh Kumar
2019-12-04  9:33       ` Thierry Reding
2019-12-04  9:51         ` Viresh Kumar
2020-04-07 10:05           ` Thierry Reding
2020-04-27  7:18             ` Thierry Reding
2020-04-29  8:21               ` Sumit Gupta
2020-05-06 16:58               ` Thierry Reding
2020-05-20 14:43             ` Rob Herring
2020-05-20 15:38               ` Thierry Reding
2020-05-20 16:21                 ` Rob Herring
2019-12-04 10:21       ` Mikko Perttunen
2019-12-04 10:26         ` Viresh Kumar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).