Linux-PM Archive on lore.kernel.org
 help / color / Atom feed
From: Chanwoo Choi <cw00.choi@samsung.com>
To: Leonard Crestez <leonard.crestez@nxp.com>,
	MyungJoo Ham <myungjoo.ham@samsung.com>,
	Kyungmin Park <kyungmin.park@samsung.com>,
	Rob Herring <robh+dt@kernel.org>
Cc: "Stephen Boyd" <sboyd@kernel.org>,
	"Rafael J. Wysocki" <rjw@rjwysocki.net>,
	"Shawn Guo" <shawnguo@kernel.org>,
	"Mark Rutland" <mark.rutland@arm.com>,
	"Michael Turquette" <mturquette@baylibre.com>,
	"Artur Świgoń" <a.swigon@partner.samsung.com>,
	"Saravana Kannan" <saravanak@google.com>,
	"Angus Ainslie" <angus@akkea.ca>,
	"Martin Kepplinger" <martink@posteo.de>,
	"Matthias Kaehlcke" <mka@chromium.org>,
	"Krzysztof Kozlowski" <krzk@kernel.org>,
	"Alexandre Bailon" <abailon@baylibre.com>,
	"Georgi Djakov" <georgi.djakov@linaro.org>,
	"Aisheng Dong" <aisheng.dong@nxp.com>,
	"Abel Vesa" <abel.vesa@nxp.com>, "Jacky Bai" <ping.bai@nxp.com>,
	"Anson Huang" <anson.huang@nxp.com>,
	"Fabio Estevam" <fabio.estevam@nxp.com>,
	"Viresh Kumar" <viresh.kumar@linaro.org>,
	"devicetree@vger.kernel.org" <devicetree@vger.kernel.org>,
	"linux-pm@vger.kernel.org" <linux-pm@vger.kernel.org>,
	"linux-clk@vger.kernel.org" <linux-clk@vger.kernel.org>,
	dl-linux-imx <linux-imx@nxp.com>,
	"kernel@pengutronix.de" <kernel@pengutronix.de>,
	"linux-arm-kernel@lists.infradead.org"
	<linux-arm-kernel@lists.infradead.org>
Subject: Re: [PATCH v4 5/6] PM / devfreq: imx8m-ddrc: Measure bandwidth with perf
Date: Wed, 13 Nov 2019 10:43:40 +0900
Message-ID: <5c5d4237-c8f1-f59e-272e-84d2a8528054@samsung.com> (raw)
In-Reply-To: <VI1PR04MB702331A22024C6EDB57AC840EE770@VI1PR04MB7023.eurprd04.prod.outlook.com>

Hi Leonard,

On 11/12/19 10:17 PM, Leonard Crestez wrote:
> On 11.11.2019 07:13, Chanwoo Choi wrote:
>> Hi Leonard,
>>
>> On 11/9/19 7:39 AM, Leonard Crestez wrote:
>>> The imx8m ddrc has a performance monitoring block attached which can
>>> be used to measure bandwidth usage and automatically adjust frequency.
>>>
>>> There is already a perf driver for that block so instead of implementing
>>> a devfreq events driver use the in-kernel perf API to implement
>>> get_dev_status directly.
>>>
>>> Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
>>> ---
>>>   drivers/devfreq/imx8m-ddrc.c | 153 +++++++++++++++++++++++++++++++++++
>>>   1 file changed, 153 insertions(+)
>>>
>>> diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c
>>> index 51903fee21a7..6372191f72d7 100644
>>> --- a/drivers/devfreq/imx8m-ddrc.c
>>> +++ b/drivers/devfreq/imx8m-ddrc.c
>>> @@ -11,10 +11,13 @@
>>>   #include <linux/pm_opp.h>
>>>   #include <linux/clk.h>
>>>   #include <linux/clk-provider.h>
>>>   #include <linux/arm-smccc.h>
>>>   
>>> +#include <asm/perf_event.h>
>>> +#include <linux/perf_event.h>
>>> +
>>>   #define IMX_SIP_DDR_DVFS			0xc2000004
>>>   
>>>   /* Values starting from 0 switch to specific frequency */
>>>   #define IMX_SIP_DDR_FREQ_SET_HIGH		0x00
>>>   
>>> @@ -78,10 +81,22 @@ struct imx8m_ddrc {
>>>   	struct clk *dram_alt;
>>>   	struct clk *dram_apb;
>>>   
>>>   	int freq_count;
>>>   	struct imx8m_ddrc_freq freq_table[IMX8M_DDRC_MAX_FREQ_COUNT];
>>> +
>>> +	/* For measuring load with perf events: */
>>> +	struct platform_device *pmu_pdev;
>>> +	struct pmu *pmu;
>>> +
>>> +	struct perf_event_attr rd_event_attr;
>>> +	struct perf_event_attr wr_event_attr;
>>> +	struct perf_event *rd_event;
>>> +	struct perf_event *wr_event;
>>> +
>>> +	u64 last_rd_val, last_rd_ena, last_rd_run;
>>> +	u64 last_wr_val, last_wr_ena, last_wr_run;
>>>   };
>>>   
>>>   static struct imx8m_ddrc_freq *imx8m_ddrc_find_freq(struct imx8m_ddrc *priv,
>>>   						    unsigned long rate)
>>>   {
>>> @@ -245,10 +260,131 @@ static int imx8m_ddrc_get_cur_freq(struct device *dev, unsigned long *freq)
>>>   	*freq = clk_get_rate(priv->dram_core);
>>>   
>>>   	return 0;
>>>   }
>>>   
>>> +static int imx8m_ddrc_get_dev_status(struct device *dev,
>>> +				     struct devfreq_dev_status *stat)
>>> +{
>>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>> +
>>> +	stat->current_frequency = clk_get_rate(priv->dram_core);
>>> +
>>> +	if (priv->rd_event && priv->wr_event) {
>>> +		u64 rd_delta, rd_val, rd_ena, rd_run;
>>> +		u64 wr_delta, wr_val, wr_ena, wr_run;
>>> +
>>> +		rd_val = perf_event_read_value(priv->rd_event,
>>> +					       &rd_ena, &rd_run);
>>> +		wr_val = perf_event_read_value(priv->wr_event,
>>> +					       &wr_ena, &wr_run);
>>> +
>>> +		rd_delta = (rd_val - priv->last_rd_val) *
>>> +			   (rd_ena - priv->last_rd_ena);
>>> +		do_div(rd_delta, rd_run - priv->last_rd_run);
>>> +		priv->last_rd_val = rd_val;
>>> +		priv->last_rd_ena = rd_ena;
>>> +		priv->last_rd_run = rd_run;
>>> +
>>> +		wr_delta = (wr_val - priv->last_wr_val) *
>>> +			   (wr_ena - priv->last_wr_ena);
>>> +		do_div(wr_delta, wr_run - priv->last_wr_run);
>>> +		priv->last_wr_val = wr_val;
>>> +		priv->last_wr_ena = wr_ena;
>>> +		priv->last_wr_run = wr_run;
>>> +
>>> +		/* magic numbers, possibly wrong */
>>> +		stat->busy_time = 4 * (rd_delta + wr_delta);
>>> +		stat->total_time = stat->current_frequency;
>>> +	} else {
>>> +		stat->busy_time = 0;
>>> +		stat->total_time = 0;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int imx8m_ddrc_perf_disable(struct imx8m_ddrc *priv)
>>> +{
>>> +	/* release and set to NULL */
>>> +	if (!IS_ERR_OR_NULL(priv->rd_event))
>>> +		perf_event_release_kernel(priv->rd_event);
>>> +	if (!IS_ERR_OR_NULL(priv->wr_event))
>>> +		perf_event_release_kernel(priv->wr_event);
>>> +	priv->rd_event = NULL;
>>> +	priv->wr_event = NULL;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int imx8m_ddrc_perf_enable(struct imx8m_ddrc *priv)
>>
>> Actually, this function have to call the just function for enabling
>> the bandwidth monitoring instead of writing the detailed something.
>> It looks like that you move the part of the different device driver into here.
> 
> This is the code for enabling bandwith monitoring: it creates perf 
> counters using in-kernel API. The perf api doesn't seem to expose 
> anything else to enable/disable the counter after creation and honestly 
> just create/destroy seems fine.
> 
> As far as I can tell bandwidth monitoring in devfreq is always enabled 
> on probe anyway, no matter which governor is in use. It would be nice if 
> devfreq drivers could receive a callback and dynamically acquire/release 
> monitoring resources only when the ondemand governor is in used.

As you commented, it doesn't matter to keep the enabling state of perf.
Just, I want to say that I think that it is not proper to access
the different device driver directly without any standard subsystem interface.

> 
> Until then this driver will permanently allocate 2 (out of 3) counters 
> in ddr pmu hardware.
> 
>>> +{
>>> +	int ret;
>>> +
>>> +	priv->rd_event_attr.size = sizeof(priv->rd_event_attr);
>>> +	priv->rd_event_attr.type = priv->pmu->type;
>>> +	priv->rd_event_attr.config = 0x2a;
>>> +
>>> +	priv->rd_event = perf_event_create_kernel_counter(
>>> +			&priv->rd_event_attr, 0, NULL, NULL, NULL);
>>> +	if (IS_ERR(priv->rd_event)) {
>>> +		ret = PTR_ERR(priv->rd_event);
>>> +		goto err;
>>> +	}
>>> +
>>> +	priv->wr_event_attr.size = sizeof(priv->wr_event_attr);
>>> +	priv->wr_event_attr.type = priv->pmu->type;
>>> +	priv->wr_event_attr.config = 0x2b;
>>> +
>>> +	priv->wr_event = perf_event_create_kernel_counter(
>>> +			&priv->wr_event_attr, 0, NULL, NULL, NULL);
>>> +	if (IS_ERR(priv->wr_event)) {
>>> +		ret = PTR_ERR(priv->wr_event);
>>> +		goto err;
>>> +	}
>>> +
>>> +	return 0;
>>> +
>>> +err:
>>> +	imx8m_ddrc_perf_disable(priv);
>>> +	return ret;
>>> +}
>>> +
>>> +static int imx8m_ddrc_init_events(struct device *dev,
>>> +				  struct device_node *events_node)
>>
>> ditto.
>>
>>> +{
>>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>> +	struct device_driver *driver;
>>> +
>>> +	/*
>>> +	 * We need pmu->type for perf_event_attr but there is no API for
>>> +	 * mapping device_node to pmu. Fetch private data for imx-ddr-pmu and
>>> +	 * cast that to a struct pmu instead.
>>> +	 */
>>> +	priv->pmu_pdev = of_find_device_by_node(events_node);
>>> +	if (!priv->pmu_pdev)
>>> +		return -EPROBE_DEFER;
>>> +	driver = priv->pmu_pdev->dev.driver;
>>> +	if (!driver)
>>> +		return -EPROBE_DEFER;
>>> +	if (strcmp(driver->name, "imx-ddr-pmu")) {
>>> +		dev_warn(dev, "devfreq-events node %pOF has unexpected driver %s\n",
>>> +				events_node, driver->name);
>>> +		return -ENODEV;
>>> +	}
>>> +
>>> +	priv->pmu = platform_get_drvdata(priv->pmu_pdev);
>>
>> It seems that you access the different device driver without
>> any standard interface from some linux kernel subsystem.
>>
>> For the communication or control between different device drivers,
>> we have to use the standard interface instead of direct access or call.
>> I think that it make it too tightly coupled driver between them.
>>
>> So, I developed the devfreq-event subsystem for this reason
>> in order to remove the non-standard direct access to other device driver.
>>
>> Unfortunately, I can't agree this approach. I don't prefer to use
>> the direct access of other device driver(imx-ddr-pmu). You need to
>> use standard interface provided from subsystem. or You need to make
>> the new device driver with devfreq-event subsystem.
> 
> This could be cleaned-up by adding a new API to "fetch struct pmu* by 
> struct device_node*" as available for many other subsystems.

OK. I'll expect the next version with keeping the rule without
any direct access between two device drivers.

 The perf 
> api is otherwise standard/generic and has a few other in-kernel users.

> 
> The perf driver for DDR PMU is already functional and useful for 
> profiling and reusing it seem very worthwhile. If you're suggesting I 
> implemented an unrelated "devfreq-event" driver then how would it get 
> probed? There's only one PMU node in DT.

No, I don't force to develop the new devfreq-event instead of exiting perf driver.
As I said already,I think that it is not proper to access the different device driver
directly without any standard subsystem interface.

The following style access is not safe. Any device driver might change
the platform data of the other device driver. 
	priv->pmu = platform_get_drvdata(priv->pmu_pdev);

If you support this patch with standard subsystem interface, I'm OK.
Just, I don't want to access the data of different device driver.

> 
> I wouldn't mind to delay the monitoring part into a second series.
> 
>>> +	if (!priv->pmu) {
>>> +		dev_err(dev, "devfreq-events device missing private data\n");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	dev_dbg(dev, "events from pmu %s\n", priv->pmu->name);
>>> +
>>> +	return imx8m_ddrc_perf_enable(priv);
>>> +}
>>> +
>>>   static int imx8m_ddrc_init_freq_info(struct device *dev)
>>>   {
>>>   	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>>   	struct arm_smccc_res res;
>>>   	int index;
>>> @@ -328,17 +464,23 @@ static int imx8m_ddrc_check_opps(struct device *dev)
>>>   	return 0;
>>>   }
>>>   
>>>   static void imx8m_ddrc_exit(struct device *dev)
>>>   {
>>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>> +
>>> +	imx8m_ddrc_perf_disable(priv);
>>> +	platform_device_put(priv->pmu_pdev);
>>> +
>>>   	dev_pm_opp_of_remove_table(dev);
>>>   }
>>>   
>>>   static int imx8m_ddrc_probe(struct platform_device *pdev)
>>>   {
>>>   	struct device *dev = &pdev->dev;
>>>   	struct imx8m_ddrc *priv;
>>> +	struct device_node *events_node;
>>>   	const char *gov = DEVFREQ_GOV_USERSPACE;
>>>   	int ret;
>>>   
>>>   	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
>>>   	if (!priv)
>>> @@ -350,10 +492,19 @@ static int imx8m_ddrc_probe(struct platform_device *pdev)
>>>   	if (ret) {
>>>   		dev_err(dev, "failed to init firmware freq info: %d\n", ret);
>>>   		return ret;
>>>   	}
>>>   
>>> +	events_node = of_parse_phandle(dev->of_node, "devfreq-events", 0);
>>> +	if (events_node) {
>>> +		ret = imx8m_ddrc_init_events(dev, events_node);
>>> +		of_node_put(events_node);
>>> +		if (ret)
>>> +			goto err;
>>> +		gov = DEVFREQ_GOV_SIMPLE_ONDEMAND;
>>> +	}
>>> +
>>>   	priv->dram_core = devm_clk_get(dev, "core");
>>>   	priv->dram_pll = devm_clk_get(dev, "pll");
>>>   	priv->dram_alt = devm_clk_get(dev, "alt");
>>>   	priv->dram_apb = devm_clk_get(dev, "apb");
>>>   	if (IS_ERR(priv->dram_core) ||
>>> @@ -390,10 +541,12 @@ static int imx8m_ddrc_probe(struct platform_device *pdev)
>>>   	}
>>>   
>>>   	return 0;
>>>   
>>>   err:
>>> +	imx8m_ddrc_perf_disable(priv);
>>> +	platform_device_put(priv->pmu_pdev);
>>>   	dev_pm_opp_of_remove_table(dev);
>>>   	return ret;
>>>   }
>>>   
>>>   static const struct of_device_id imx8m_ddrc_of_match[] = {
> 
> 


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

  reply index

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-08 22:39 [PATCH v4 0/6] PM / devfreq: Add dynamic scaling for imx8m ddr controller Leonard Crestez
2019-11-08 22:39 ` [PATCH v4 1/6] clk: imx8m: Set CLK_GET_RATE_NOCACHE on dram clocks Leonard Crestez
2019-11-12 11:18   ` Abel Vesa
2019-11-12 13:43     ` Leonard Crestez
2019-11-12 15:10       ` Abel Vesa
2019-11-08 22:39 ` [PATCH v4 2/6] clk: imx: Mark dram pll on 8mm and 8mn with CLK_GET_RATE_NOCACHE Leonard Crestez
2019-11-12 11:18   ` Abel Vesa
2019-11-08 22:39 ` [PATCH v4 3/6] dt-bindings: memory: Add bindings for imx8m ddr controller Leonard Crestez
2019-11-08 22:39 ` [PATCH v4 4/6] PM / devfreq: Add dynamic scaling " Leonard Crestez
2019-11-11  3:23   ` Chanwoo Choi
2019-11-11 17:23     ` Leonard Crestez
2019-11-12  1:00       ` Chanwoo Choi
2019-11-12 14:47         ` Leonard Crestez
2019-11-08 22:39 ` [PATCH v4 5/6] PM / devfreq: imx8m-ddrc: Measure bandwidth with perf Leonard Crestez
2019-11-11  5:18   ` Chanwoo Choi
2019-11-12 13:17     ` Leonard Crestez
2019-11-13  1:43       ` Chanwoo Choi [this message]
2019-11-08 22:39 ` [PATCH v4 6/6] arm64: dts: imx8m: Add ddr controller nodes Leonard Crestez

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5c5d4237-c8f1-f59e-272e-84d2a8528054@samsung.com \
    --to=cw00.choi@samsung.com \
    --cc=a.swigon@partner.samsung.com \
    --cc=abailon@baylibre.com \
    --cc=abel.vesa@nxp.com \
    --cc=aisheng.dong@nxp.com \
    --cc=angus@akkea.ca \
    --cc=anson.huang@nxp.com \
    --cc=devicetree@vger.kernel.org \
    --cc=fabio.estevam@nxp.com \
    --cc=georgi.djakov@linaro.org \
    --cc=kernel@pengutronix.de \
    --cc=krzk@kernel.org \
    --cc=kyungmin.park@samsung.com \
    --cc=leonard.crestez@nxp.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-clk@vger.kernel.org \
    --cc=linux-imx@nxp.com \
    --cc=linux-pm@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=martink@posteo.de \
    --cc=mka@chromium.org \
    --cc=mturquette@baylibre.com \
    --cc=myungjoo.ham@samsung.com \
    --cc=ping.bai@nxp.com \
    --cc=rjw@rjwysocki.net \
    --cc=robh+dt@kernel.org \
    --cc=saravanak@google.com \
    --cc=sboyd@kernel.org \
    --cc=shawnguo@kernel.org \
    --cc=viresh.kumar@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-PM Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-pm/0 linux-pm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-pm linux-pm/ https://lore.kernel.org/linux-pm \
		linux-pm@vger.kernel.org
	public-inbox-index linux-pm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-pm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git