linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Krzysztof Kozlowski <krzk@kernel.org>
To: Lukasz Luba <l.luba@partner.samsung.com>
Cc: devicetree@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-pm@vger.kernel.org, linux-samsung-soc@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org, b.zolnierkie@samsung.com,
	kgene@kernel.org, mark.rutland@arm.com, cw00.choi@samsung.com,
	kyungmin.park@samsung.com, m.szyprowski@samsung.com,
	s.nawrocki@samsung.com, myungjoo.ham@samsung.com,
	robh+dt@kernel.org, willy.mh.wolff.ml@gmail.com
Subject: Re: [PATCH 3/3] memory: samsung: exynos5422-dmc: Add support for interrupt from performance counters
Date: Fri, 27 Sep 2019 11:19:20 +0200	[thread overview]
Message-ID: <20190927091920.GB19131@pi3> (raw)
In-Reply-To: <20190925161813.21117-4-l.luba@partner.samsung.com>

On Wed, Sep 25, 2019 at 06:18:13PM +0200, Lukasz Luba wrote:
> Introduce a new interrupt driven mechanism for managing speed of the
> memory controller. The interrupts are generated due to performance
> counters overflow. The performance counters might track memory reads,
> writes, transfers, page misses, etc. In the basic algorithm tracking
> read transfers and calculating memory pressure should be enough to
> skip polling mode in devfreq.
> 
> Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
> ---
>  drivers/memory/samsung/exynos5422-dmc.c | 297 ++++++++++++++++++++++--
>  1 file changed, 272 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c
> index 0fe5f2186139..86e1844b97ef 100644
> --- a/drivers/memory/samsung/exynos5422-dmc.c
> +++ b/drivers/memory/samsung/exynos5422-dmc.c
> @@ -8,6 +8,7 @@
>  #include <linux/devfreq.h>
>  #include <linux/devfreq-event.h>
>  #include <linux/device.h>
> +#include <linux/interrupt.h>
>  #include <linux/io.h>
>  #include <linux/mfd/syscon.h>
>  #include <linux/module.h>
> @@ -35,6 +36,29 @@
>  #define USE_BPLL_TIMINGS			(0)
>  #define EXYNOS5_AREF_NORMAL			(0x2e)
>  
> +#define DREX_PPCCLKCON		(0x0130)
> +#define DREX_PEREV2CONFIG	(0x013c)
> +#define DREX_PMNC_PPC		(0xE000)
> +#define DREX_CNTENS_PPC		(0xE010)
> +#define DREX_CNTENC_PPC		(0xE020)
> +#define DREX_INTENS_PPC		(0xE030)
> +#define DREX_INTENC_PPC		(0xE040)
> +#define DREX_FLAG_PPC		(0xE050)
> +#define DREX_PMCNT2_PPC		(0xE130)
> +
> +#define CC_RESET		BIT(2)
> +#define PPC_COUNTER_RESET	BIT(1)
> +#define PPC_ENABLE		BIT(0)
> +#define PEREV_CLK_EN		BIT(0)
> +#define PERF_CNT2		BIT(2)
> +#define PERF_CCNT		BIT(31)

Describe to which registers these bitfields are applicable.

> +
> +#define READ_TRANSFER_CH0	(0x6d)
> +#define READ_TRANSFER_CH1	(0x6f)

The same. Otherwise they all look like some generic constants which is
not true.

> +
> +#define PERF_COUNTER_START_VALUE 0xff000000
> +#define PERF_EVENT_UP_DOWN_THRESHOLD 900000000ULL
> +
>  /**
>   * struct dmc_opp_table - Operating level desciption
>   *
> @@ -85,6 +109,9 @@ struct exynos5_dmc {
>  	struct clk *mout_mx_mspll_ccore_phy;
>  	struct devfreq_event_dev **counter;
>  	int num_counters;
> +	u64 last_overflow_ts[2];
> +	unsigned long load, total;

One member per line.  This decreases readability.

> +	bool in_irq_mode;
>  };
>  
>  #define TIMING_FIELD(t_name, t_bit_beg, t_bit_end) \
> @@ -653,6 +680,167 @@ static int exynos5_counters_get(struct exynos5_dmc *dmc,
>  	return 0;
>  }
>  
> +/**
> + * exynos5_dmc_start_perf_events() - Setup and start performance event counters
> + * @dmc:	device for which the counters are going to be checked
> + * @beg_value:	initial value for the counter
> + *
> + * Function which enables needed counters, interrupts and sets initial values
> + * then starts the counters.
> + */
> +static void exynos5_dmc_start_perf_events(struct exynos5_dmc *dmc,
> +					  u32 beg_value)
> +{
> +	/* Enable interrupts for counter 2 */
> +	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENS_PPC);
> +	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENS_PPC);

Blank line.

> +	/* Enable counter 2 and CCNT  */
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENS_PPC);
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENS_PPC);

Blank line.

> +	/* Clear overflow flag for all counters */
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);

Blank line.

> +	/* Reset all counters */
> +	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi0 + DREX_PMNC_PPC);
> +	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi1 + DREX_PMNC_PPC);

Blank line.

> +	/*
> +	 * Set start value for the counters, the number of samples that
> +	 * will be gathered is calculated as: 0xffffffff - beg_value
> +	 */
> +	writel(beg_value, dmc->base_drexi0 + DREX_PMCNT2_PPC);
> +	writel(beg_value, dmc->base_drexi1 + DREX_PMCNT2_PPC);

Blank line.

> +	/* Start all counters */
> +	writel(PPC_ENABLE, dmc->base_drexi0 + DREX_PMNC_PPC);
> +	writel(PPC_ENABLE, dmc->base_drexi1 + DREX_PMNC_PPC);
> +}
> +
> +/**
> + * exynos5_dmc_perf_events_calc() - Calculate utilization
> + * @dmc:	device for which the counters are going to be checked
> + * @diff_ts:	time between last interrupt and current one
> + *
> + * Function which calculates needed utilization for the devfreq governor.
> + * It prepares values for 'busy_time' and 'total_time' based on elapsed time
> + * between interrupts, which approximates utilization.
> + */
> +static void exynos5_dmc_perf_events_calc(struct exynos5_dmc *dmc, u64 diff_ts)
> +{
> +	/*
> +	 * This is a simple algorithm for managing traffic on DMC.
> +	 * When there is almost no load the counters overflow every 4s,
> +	 * no mater the DMC frequency.
> +	 * The high load might be approximated using linear function.
> +	 * Knowing that, simple calculation can provide 'busy_time' and
> +	 * 'total_time' to the devfreq governor which picks up target
> +	 * frequency.
> +	 * We want a fast ramp up and slow decay in frequency change function.
> +	 */
> +	if (diff_ts < PERF_EVENT_UP_DOWN_THRESHOLD) {
> +		/*
> +		 * Set higher utilization for the simple_ondemand governor.
> +		 * The governor should increase the frequency of the DMC.
> +		 */
> +		dmc->load = 70;
> +		dmc->total = 100;
> +	} else {
> +		/*
> +		 * Set low utilization for the simple_ondemand governor.
> +		 * The governor should decrease the frequency of the DMC.
> +		 */
> +		dmc->load = 35;
> +		dmc->total = 100;
> +	}
> +
> +	dev_dbg(dmc->dev, "diff_ts=%llu\n", diff_ts);
> +}
> +
> +/**
> + * exynos5_dmc_perf_events_check() - Checks the status of the counters
> + * @dmc:	device for which the counters are going to be checked
> + *
> + * Function which is called from threaded IRQ to check the counters state
> + * and to call approximation for the needed utilization.
> + */
> +static void exynos5_dmc_perf_events_check(struct exynos5_dmc *dmc)
> +{
> +	u32 val;
> +	u64 diff_ts, ts;
> +
> +	ts = ktime_get_ns();
> +
> +	/* Stop all counters */
> +	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
> +	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
> +
> +	/* Check the source in interrupt flag registers (which channel) */
> +	val = readl(dmc->base_drexi0 + DREX_FLAG_PPC);
> +	if (val) {
> +		diff_ts = ts - dmc->last_overflow_ts[0];
> +		dmc->last_overflow_ts[0] = ts;
> +		dev_dbg(dmc->dev, "drex0 0xE050 val= 0x%08x\n",  val);
> +	} else {
> +		val = readl(dmc->base_drexi1 + DREX_FLAG_PPC);
> +		diff_ts = ts - dmc->last_overflow_ts[1];
> +		dmc->last_overflow_ts[1] = ts;
> +		dev_dbg(dmc->dev, "drex1 0xE050 val= 0x%08x\n",  val);
> +	}
> +
> +	exynos5_dmc_perf_events_calc(dmc, diff_ts);
> +
> +	exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
> +}
> +
> +/**
> + * exynos5_dmc_enable_perf_events() - Enable performance events
> + * @dmc:	device for which the counters are going to be checked
> + *
> + * Function which is setup needed environment and enables counters.
> + */
> +static void exynos5_dmc_enable_perf_events(struct exynos5_dmc *dmc)
> +{
> +	u64 ts;
> +
> +	/* Enable Performance Event Clock */
> +	writel(PEREV_CLK_EN, dmc->base_drexi0 + DREX_PPCCLKCON);
> +	writel(PEREV_CLK_EN, dmc->base_drexi1 + DREX_PPCCLKCON);
> +
> +	/* Select read transfers as performance event2 */
> +	writel(READ_TRANSFER_CH0, dmc->base_drexi0 + DREX_PEREV2CONFIG);
> +	writel(READ_TRANSFER_CH1, dmc->base_drexi1 + DREX_PEREV2CONFIG);
> +
> +	dmc->in_irq_mode = 1;

Move this outside, to the probe. Logically it belongs there.

> +
> +	ts = ktime_get_ns();
> +	dmc->last_overflow_ts[0] = ts;
> +	dmc->last_overflow_ts[1] = ts;
> +
> +	/* Devfreq shouldn't be faster than initialization, play safe though. */
> +	dmc->load = 99;
> +	dmc->total = 100;
> +}
> +
> +/**
> + * exynos5_dmc_disable_perf_events() - Disable performance events
> + * @dmc:	device for which the counters are going to be checked
> + *
> + * Function which stops, disables performance event counters and interrupts.
> + */
> +static void exynos5_dmc_disable_perf_events(struct exynos5_dmc *dmc)
> +{
> +	/* Stop all counters */
> +	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
> +	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);

Blank line here and later.

> +	/* Disable interrupts for counter 2 */
> +	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENC_PPC);
> +	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENC_PPC);
> +	/* Disable counter 2 and CCNT  */
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENC_PPC);
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENC_PPC);
> +	/* Clear overflow flag for all counters */
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
> +}
> +
>  /**
>   * exynos5_dmc_get_status() - Read current DMC performance statistics.
>   * @dev:	device for which the statistics are requested
> @@ -669,18 +857,24 @@ static int exynos5_dmc_get_status(struct device *dev,
>  	unsigned long load, total;
>  	int ret;
>  
> -	ret = exynos5_counters_get(dmc, &load, &total);
> -	if (ret < 0)
> -		return -EINVAL;
> +	if (dmc->in_irq_mode) {
> +		stat->current_frequency = dmc->curr_rate;
> +		stat->busy_time = dmc->load;
> +		stat->total_time = dmc->total;
> +	} else {
> +		ret = exynos5_counters_get(dmc, &load, &total);
> +		if (ret < 0)
> +			return -EINVAL;
>  
> -	/* To protect from overflow in calculation ratios, divide by 1024 */
> -	stat->busy_time = load >> 10;
> -	stat->total_time = total >> 10;
> +		/* To protect from overflow, divide by 1024 */
> +		stat->busy_time = load >> 10;
> +		stat->total_time = total >> 10;
>  
> -	ret = exynos5_counters_set_event(dmc);
> -	if (ret < 0) {
> -		dev_err(dev, "could not set event counter\n");
> -		return ret;
> +		ret = exynos5_counters_set_event(dmc);
> +		if (ret < 0) {
> +			dev_err(dev, "could not set event counter\n");
> +			return ret;
> +		}
>  	}
>  
>  	return 0;
> @@ -712,7 +906,6 @@ static int exynos5_dmc_get_cur_freq(struct device *dev, unsigned long *freq)
>   * It provides to the devfreq framework needed functions and polling period.
>   */
>  static struct devfreq_dev_profile exynos5_dmc_df_profile = {
> -	.polling_ms = 500,
>  	.target = exynos5_dmc_target,
>  	.get_dev_status = exynos5_dmc_get_status,
>  	.get_cur_freq = exynos5_dmc_get_cur_freq,
> @@ -1108,6 +1301,26 @@ static inline int exynos5_dmc_set_pause_on_switching(struct exynos5_dmc *dmc)
>  	return 0;
>  }
>  
> +static irqreturn_t dmc_irq_thread(int irq, void *priv)
> +{
> +	int res;
> +	struct exynos5_dmc *dmc = priv;
> +
> +	dev_dbg(dmc->dev, "irq thread handler\n");

Skip a debug in thread handler for memory. It can pollute your log (I
guess depending on workload).

> +
> +	mutex_lock(&dmc->df->lock);
> +
> +	exynos5_dmc_perf_events_check(dmc);
> +
> +	res = update_devfreq(dmc->df);
> +	if (res)
> +		dev_err(dmc->dev, "devfreq failed with %d\n", res);

dev_warn()

> +
> +	mutex_unlock(&dmc->df->lock);
> +
> +	return IRQ_HANDLED;
> +}
> +
>  /**
>   * exynos5_dmc_probe() - Probe function for the DMC driver
>   * @pdev:	platform device for which the driver is going to be initialized
> @@ -1125,6 +1338,7 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>  	struct device_node *np = dev->of_node;
>  	struct exynos5_dmc *dmc;
>  	struct resource *res;
> +	int irq;
>  
>  	dmc = devm_kzalloc(dev, sizeof(*dmc), GFP_KERNEL);
>  	if (!dmc)
> @@ -1172,24 +1386,48 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>  		goto remove_clocks;
>  	}
>  
> -	ret = exynos5_performance_counters_init(dmc);
> -	if (ret) {
> -		dev_warn(dev, "couldn't probe performance counters\n");
> -		goto remove_clocks;
> -	}
> -
>  	ret = exynos5_dmc_set_pause_on_switching(dmc);
>  	if (ret) {
>  		dev_warn(dev, "couldn't get access to PAUSE register\n");
>  		goto err_devfreq_add;

This is wrong now, I think.

>  	}
>  
> -	/*
> -	 * Setup default thresholds for the devfreq governor.
> -	 * The values are chosen based on experiments.
> -	 */
> -	dmc->gov_data.upthreshold = 30;
> -	dmc->gov_data.downdifferential = 5;
> +	/* There is two modes in which the driver works: polling or IRQ */
> +	irq = platform_get_irq(pdev, 0);

You need to document it in bindings.

> +	if (irq < 0) {
> +		ret = exynos5_performance_counters_init(dmc);
> +		if (ret) {
> +			dev_warn(dev, "couldn't probe performance counters\n");
> +			goto remove_clocks;

Weird, previous error jump goes to err_devfreq_add. This goes to error
label which is narrower (less to cleanup).

Best regards,
Krzysztof

> +		}
> +
> +		/*
> +		 * Setup default thresholds for the devfreq governor.
> +		 * The values are chosen based on experiments.
> +		 */
> +		dmc->gov_data.upthreshold = 30;
> +		dmc->gov_data.downdifferential = 5;
> +
> +		exynos5_dmc_df_profile.polling_ms = 500;
> +	} else {
> +		ret = devm_request_threaded_irq(dev, irq, NULL,
> +						dmc_irq_thread, IRQF_ONESHOT,
> +						dev_name(dev), dmc);
> +		if (ret) {
> +			dev_err(dev, "couldn't grab IRQ\n");
> +			goto remove_clocks;
> +		}
> +
> +		/*
> +		 * Setup default thresholds for the devfreq governor.
> +		 * The values are chosen based on experiments.
> +		 */
> +		dmc->gov_data.upthreshold = 55;
> +		dmc->gov_data.downdifferential = 5;
> +
> +		exynos5_dmc_enable_perf_events(dmc);
> +	}
> +
>  
>  	dmc->df = devm_devfreq_add_device(dev, &exynos5_dmc_df_profile,
>  					  DEVFREQ_GOV_SIMPLE_ONDEMAND,
> @@ -1200,12 +1438,18 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>  		goto err_devfreq_add;
>  	}
>  
> +	if (dmc->in_irq_mode)
> +		exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
> +
>  	dev_info(dev, "DMC initialized\n");
>  
>  	return 0;
>  
>  err_devfreq_add:
> -	exynos5_counters_disable_edev(dmc);
> +	if (dmc->in_irq_mode)
> +		exynos5_dmc_disable_perf_events(dmc);
> +	else
> +		exynos5_counters_disable_edev(dmc);
>  remove_clocks:
>  	clk_disable_unprepare(dmc->mout_bpll);
>  	clk_disable_unprepare(dmc->fout_bpll);
> @@ -1225,7 +1469,10 @@ static int exynos5_dmc_remove(struct platform_device *pdev)
>  {
>  	struct exynos5_dmc *dmc = dev_get_drvdata(&pdev->dev);
>  
> -	exynos5_counters_disable_edev(dmc);
> +	if (dmc->in_irq_mode)
> +		exynos5_dmc_disable_perf_events(dmc);
> +	else
> +		exynos5_counters_disable_edev(dmc);
>  
>  	clk_disable_unprepare(dmc->mout_bpll);
>  	clk_disable_unprepare(dmc->fout_bpll);
> -- 
> 2.17.1
> 

  reply	other threads:[~2019-09-27  9:19 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20190925161841eucas1p12b3b798020b3493e9a4804d98b422f17@eucas1p1.samsung.com>
2019-09-25 16:18 ` [PATCH 0/3] Exynos5 DMC interrupt mode Lukasz Luba
     [not found]   ` <CGME20190925161842eucas1p271a9cf4f62b3d7af02c0a5d0d1eb9c4f@eucas1p2.samsung.com>
2019-09-25 16:18     ` [PATCH 1/3] ARM: dts: exynos: Add interrupt to DMC controller in Exynos5422 Lukasz Luba
2019-09-27  8:53       ` Krzysztof Kozlowski
2019-10-01 12:38         ` Lukasz Luba
     [not found]   ` <CGME20190925161843eucas1p1abaa1aaa20fcf5c55c9e52bb6a491317@eucas1p1.samsung.com>
2019-09-25 16:18     ` [PATCH 2/3] ARM: dts: exynos: map 0x10000 SFR instead of 0x100 in DMC Exynos5422 Lukasz Luba
     [not found]   ` <CGME20190925161844eucas1p2dc69a451c2d86fd7f4be2b33940f8d62@eucas1p2.samsung.com>
2019-09-25 16:18     ` [PATCH 3/3] memory: samsung: exynos5422-dmc: Add support for interrupt from performance counters Lukasz Luba
2019-09-27  9:19       ` Krzysztof Kozlowski [this message]
2019-10-01 10:11         ` Lukasz Luba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190927091920.GB19131@pi3 \
    --to=krzk@kernel.org \
    --cc=b.zolnierkie@samsung.com \
    --cc=cw00.choi@samsung.com \
    --cc=devicetree@vger.kernel.org \
    --cc=kgene@kernel.org \
    --cc=kyungmin.park@samsung.com \
    --cc=l.luba@partner.samsung.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=linux-samsung-soc@vger.kernel.org \
    --cc=m.szyprowski@samsung.com \
    --cc=mark.rutland@arm.com \
    --cc=myungjoo.ham@samsung.com \
    --cc=robh+dt@kernel.org \
    --cc=s.nawrocki@samsung.com \
    --cc=willy.mh.wolff.ml@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).