Re: [V4 2/3] interconnect: qcom: Add EPSS L3 support on SC7280

From: okukatla@codeaurora.org
To: Stephen Boyd <swboyd@chromium.org>
Cc: Andy Gross <agross@kernel.org>, Georgi Djakov <djakov@kernel.org>,
	bjorn.andersson@linaro.org, evgreen@google.com,
	georgi.djakov@linaro.org, linux-arm-msm@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org,
	seansw@qti.qualcomm.com, elder@linaro.org,
	linux-arm-msm-owner@vger.kernel.org
Subject: Re: [V4 2/3] interconnect: qcom: Add EPSS L3 support on SC7280
Date: Mon, 09 Aug 2021 22:15:31 +0530	[thread overview]
Message-ID: <b1e757d1f0001ade03180a37b6c2c97e@codeaurora.org> (raw)
In-Reply-To: <CAE-0n52Hvf_b_bLBz3d-Ts8VyVM_1ydgHud=6LA-kS5+0++oqQ@mail.gmail.com>

On 2021-07-09 04:51, Stephen Boyd wrote:
> Quoting Odelu Kukatla (2021-06-18 04:28:53)
>> diff --git a/drivers/interconnect/qcom/osm-l3.c 
>> b/drivers/interconnect/qcom/osm-l3.c
>> index 695f287..a8c0ee8 100644
>> --- a/drivers/interconnect/qcom/osm-l3.c
>> +++ b/drivers/interconnect/qcom/osm-l3.c
>> @@ -15,6 +15,7 @@
>>  #include <dt-bindings/interconnect/qcom,osm-l3.h>
>> 
>>  #include "sc7180.h"
>> +#include "sc7280.h"
>>  #include "sdm845.h"
>>  #include "sm8150.h"
>>  #include "sm8250.h"
>> @@ -32,17 +33,33 @@
>> 
>>  /* EPSS Register offsets */
>>  #define EPSS_LUT_ROW_SIZE              4
>> +#define EPSS_REG_L3_VOTE               0x90
>>  #define EPSS_REG_FREQ_LUT              0x100
>>  #define EPSS_REG_PERF_STATE            0x320
>> +#define EPSS_CORE_OFFSET               0x4
>> +#define EPSS_L3_VOTE_REG(base, cpu)\
>> +                       (((base) + EPSS_REG_L3_VOTE) +\
>> +                       ((cpu) * EPSS_CORE_OFFSET))
>> 
>> -#define OSM_L3_MAX_LINKS               1
>> +#define L3_DOMAIN_CNT          4
>> +#define L3_MAX_LINKS           9
>> 
>>  #define to_qcom_provider(_provider) \
>>         container_of(_provider, struct qcom_osm_l3_icc_provider, 
>> provider)
>> 
>> +/**
>> + * @domain_base: an array of base address for each clock domain
>> + * @max_state: max supported frequency level
>> + * @per_core_dcvs: flag used to indicate whether the frequency 
>> scaling
>> + * for each core is enabled
>> + * @reg_perf_state: requested frequency level
>> + * @lut_tables: an array of supported frequency levels
>> + * @provider: interconnect provider of this node
>> + */
>>  struct qcom_osm_l3_icc_provider {
>> -       void __iomem *base;
>> +       void __iomem *domain_base[L3_DOMAIN_CNT];
>>         unsigned int max_state;
>> +       bool per_core_dcvs;
>>         unsigned int reg_perf_state;
>>         unsigned long lut_tables[LUT_MAX_ENTRIES];
>>         struct icc_provider provider;
>> @@ -55,34 +72,41 @@ struct qcom_osm_l3_icc_provider {
>>   * @id: a unique node identifier
>>   * @num_links: the total number of @links
>>   * @buswidth: width of the interconnect between a node and the bus
>> + * @domain: clock domain of the cpu node
>> + * @cpu: cpu instance within its clock domain
>>   */
>>  struct qcom_icc_node {
>>         const char *name;
>> -       u16 links[OSM_L3_MAX_LINKS];
>> +       u16 links[L3_MAX_LINKS];
>>         u16 id;
>>         u16 num_links;
>>         u16 buswidth;
>> +       u16 domain;
>> +       int cpu;
> 
> unsigned int? Or is -1 intended for no cpu? If we keep int, please
> document -1 as special.
> 
Thanks, it needs to be unsigned. will fix this in v5.
>>  };
>> 
>>  struct qcom_icc_desc {
>>         const struct qcom_icc_node **nodes;
>>         size_t num_nodes;
>> +       bool per_core_dcvs;
>>         unsigned int lut_row_size;
>>         unsigned int reg_freq_lut;
>>         unsigned int reg_perf_state;
>>  };
>> 
>> -#define DEFINE_QNODE(_name, _id, _buswidth, ...)                      
>>  \
>> -       static const struct qcom_icc_node _name = {                    
>>  \
>> -               .name = #_name,                                        
>>  \
>> -               .id = _id,                                             
>>  \
>> -               .buswidth = _buswidth,                                 
>>  \
>> -               .num_links = ARRAY_SIZE(((int[]){ __VA_ARGS__ })),     
>>  \
>> -               .links = { __VA_ARGS__ },                              
>>  \
>> +#define DEFINE_QNODE(_name, _id, _buswidth, _domain, _cpu, ...)       
>>                  \
>> +       static const struct qcom_icc_node _name = {                    
>>          \
>> +               .name = #_name,                                        
>>          \
>> +               .id = _id,                                             
>>          \
>> +               .buswidth = _buswidth,                                 
>>          \
>> +               .domain = _domain,                                     
>>          \
>> +               .cpu = _cpu,                                           
>>  \
>> +               .num_links = ARRAY_SIZE(((int[]){ __VA_ARGS__ })),     
>>          \
>> +               .links = { __VA_ARGS__ },                              
>>          \
>>         }
>> 
>> -DEFINE_QNODE(sdm845_osm_apps_l3, SDM845_MASTER_OSM_L3_APPS, 16, 
>> SDM845_SLAVE_OSM_L3);
>> -DEFINE_QNODE(sdm845_osm_l3, SDM845_SLAVE_OSM_L3, 16);
>> +DEFINE_QNODE(sdm845_osm_apps_l3, SDM845_MASTER_OSM_L3_APPS, 16, 0, 0, 
>> SDM845_SLAVE_OSM_L3);
>> +DEFINE_QNODE(sdm845_osm_l3, SDM845_SLAVE_OSM_L3, 16, 0, 0);
> 
> Please avoid making these changes. Instead, have a common macro
> __DEFINE_QNODE() that takes all the arguments and then leave
> DEFINE_QNODE alone and have it pass 0 by default for the ones that are
> new and make a new define for newer SoCs like DEFINE_DOMAIN_QNODE (or a
> better name) that takes the new arguments. Then we don't have to review
> the older SoCs and figure out what changed.
> 
Thanks, will fix this in v5.
>> 
>>  static const struct qcom_icc_node *sdm845_osm_l3_nodes[] = {
>>         [MASTER_OSM_L3_APPS] = &sdm845_osm_apps_l3,
>> @@ -97,8 +121,8 @@ static const struct qcom_icc_desc sdm845_icc_osm_l3 
>> = {
>>         .reg_perf_state = OSM_REG_PERF_STATE,
>>  };
>> 
>> -DEFINE_QNODE(sc7180_osm_apps_l3, SC7180_MASTER_OSM_L3_APPS, 16, 
>> SC7180_SLAVE_OSM_L3);
>> -DEFINE_QNODE(sc7180_osm_l3, SC7180_SLAVE_OSM_L3, 16);
>> +DEFINE_QNODE(sc7180_osm_apps_l3, SC7180_MASTER_OSM_L3_APPS, 16, 0, 0, 
>> SC7180_SLAVE_OSM_L3);
>> +DEFINE_QNODE(sc7180_osm_l3, SC7180_SLAVE_OSM_L3, 16, 0, 0);
>> 
>>  static const struct qcom_icc_node *sc7180_osm_l3_nodes[] = {
>>         [MASTER_OSM_L3_APPS] = &sc7180_osm_apps_l3,
>> @@ -113,8 +137,8 @@ static const struct qcom_icc_desc 
>> sc7180_icc_osm_l3 = {
>>         .reg_perf_state = OSM_REG_PERF_STATE,
>>  };
>> 
>> -DEFINE_QNODE(sm8150_osm_apps_l3, SM8150_MASTER_OSM_L3_APPS, 32, 
>> SM8150_SLAVE_OSM_L3);
>> -DEFINE_QNODE(sm8150_osm_l3, SM8150_SLAVE_OSM_L3, 32);
>> +DEFINE_QNODE(sm8150_osm_apps_l3, SM8150_MASTER_OSM_L3_APPS, 32, 0, 0, 
>> SM8150_SLAVE_OSM_L3);
>> +DEFINE_QNODE(sm8150_osm_l3, SM8150_SLAVE_OSM_L3, 32, 0, 0);
>> 
>>  static const struct qcom_icc_node *sm8150_osm_l3_nodes[] = {
>>         [MASTER_OSM_L3_APPS] = &sm8150_osm_apps_l3,
>> @@ -129,8 +153,8 @@ static const struct qcom_icc_desc 
>> sm8150_icc_osm_l3 = {
>>         .reg_perf_state = OSM_REG_PERF_STATE,
>>  };
>> 
>> -DEFINE_QNODE(sm8250_epss_apps_l3, SM8250_MASTER_EPSS_L3_APPS, 32, 
>> SM8250_SLAVE_EPSS_L3);
>> -DEFINE_QNODE(sm8250_epss_l3, SM8250_SLAVE_EPSS_L3, 32);
>> +DEFINE_QNODE(sm8250_epss_apps_l3, SM8250_MASTER_EPSS_L3_APPS, 32, 0, 
>> 0, SM8250_SLAVE_EPSS_L3);
>> +DEFINE_QNODE(sm8250_epss_l3, SM8250_SLAVE_EPSS_L3, 32, 0, 0);
>> 
>>  static const struct qcom_icc_node *sm8250_epss_l3_nodes[] = {
>>         [MASTER_EPSS_L3_APPS] = &sm8250_epss_apps_l3,
> 
> Because it is quite a few!
> 
>> @@ -145,6 +169,39 @@ static const struct qcom_icc_desc 
>> sm8250_icc_epss_l3 = {
>>         .reg_perf_state = EPSS_REG_PERF_STATE,
>>  };
>> 
>> +DEFINE_QNODE(sc7280_epss_apps_l3, SC7280_MASTER_EPSS_L3_APPS, 32, 0, 
>> 0, SC7280_SLAVE_EPSS_L3_SHARED, SC7280_SLAVE_EPSS_L3_CPU0, 
>> SC7280_SLAVE_EPSS_L3_CPU1, SC7280_SLAVE_EPSS_L3_CPU2, 
>> SC7280_SLAVE_EPSS_L3_CPU3, SC7280_SLAVE_EPSS_L3_CPU4, 
>> SC7280_SLAVE_EPSS_L3_CPU5, SC7280_SLAVE_EPSS_L3_CPU6, 
>> SC7280_SLAVE_EPSS_L3_CPU7);
> 
> Surely this line can be split up?
> 
will fix this in v5.
>> +DEFINE_QNODE(sc7280_epss_l3_shared, SC7280_SLAVE_EPSS_L3_SHARED, 32, 
>> 0, 0);
>> +DEFINE_QNODE(sc7280_epss_l3_cpu0, SC7280_SLAVE_EPSS_L3_CPU0, 32, 1, 
>> 0);
>> +DEFINE_QNODE(sc7280_epss_l3_cpu1, SC7280_SLAVE_EPSS_L3_CPU1, 32, 1, 
>> 1);
>> +DEFINE_QNODE(sc7280_epss_l3_cpu2, SC7280_SLAVE_EPSS_L3_CPU2, 32, 1, 
>> 2);
>> +DEFINE_QNODE(sc7280_epss_l3_cpu3, SC7280_SLAVE_EPSS_L3_CPU3, 32, 1, 
>> 3);
>> +DEFINE_QNODE(sc7280_epss_l3_cpu4, SC7280_SLAVE_EPSS_L3_CPU4, 32, 2, 
>> 0);
>> +DEFINE_QNODE(sc7280_epss_l3_cpu5, SC7280_SLAVE_EPSS_L3_CPU5, 32, 2, 
>> 1);
>> +DEFINE_QNODE(sc7280_epss_l3_cpu6, SC7280_SLAVE_EPSS_L3_CPU6, 32, 2, 
>> 2);
>> +DEFINE_QNODE(sc7280_epss_l3_cpu7, SC7280_SLAVE_EPSS_L3_CPU7, 32, 3, 
>> 0);
>> +
>> +static const struct qcom_icc_node *sc7280_epss_l3_nodes[] = {
>> +       [MASTER_EPSS_L3_APPS] = &sc7280_epss_apps_l3,
>> +       [SLAVE_EPSS_L3_SHARED] = &sc7280_epss_l3_shared,
>> +       [SLAVE_EPSS_L3_CPU0] = &sc7280_epss_l3_cpu0,
>> +       [SLAVE_EPSS_L3_CPU1] = &sc7280_epss_l3_cpu1,
>> +       [SLAVE_EPSS_L3_CPU2] = &sc7280_epss_l3_cpu2,
>> +       [SLAVE_EPSS_L3_CPU3] = &sc7280_epss_l3_cpu3,
>> +       [SLAVE_EPSS_L3_CPU4] = &sc7280_epss_l3_cpu4,
>> +       [SLAVE_EPSS_L3_CPU5] = &sc7280_epss_l3_cpu5,
>> +       [SLAVE_EPSS_L3_CPU6] = &sc7280_epss_l3_cpu6,
>> +       [SLAVE_EPSS_L3_CPU7] = &sc7280_epss_l3_cpu7,
>> +};
>> +
>> +static const struct qcom_icc_desc sc7280_icc_epss_l3 = {
>> +       .nodes = sc7280_epss_l3_nodes,
>> +       .num_nodes = ARRAY_SIZE(sc7280_epss_l3_nodes),
>> +       .per_core_dcvs = true,
>> +       .lut_row_size = EPSS_LUT_ROW_SIZE,
>> +       .reg_freq_lut = EPSS_REG_FREQ_LUT,
>> +       .reg_perf_state = EPSS_REG_PERF_STATE,
>> +};
>> +
>>  static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>>  {
>>         struct qcom_osm_l3_icc_provider *qp;
>> @@ -156,13 +213,18 @@ static int qcom_icc_set(struct icc_node *src, 
>> struct icc_node *dst)
>>         u32 agg_avg = 0;
>>         u64 rate;
>> 
>> -       qn = src->data;
>> +       qn = dst->data;
>>         provider = src->provider;
>>         qp = to_qcom_provider(provider);
>> 
>> -       list_for_each_entry(n, &provider->nodes, node_list)
>> -               provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
>> -                                   &agg_avg, &agg_peak);
>> +       /* Skip aggregation when per core l3 scaling is enabled */
>> +       if (qp->per_core_dcvs) {
>> +               agg_peak = dst->peak_bw;
>> +       } else {
>> +               list_for_each_entry(n, &provider->nodes, node_list)
>> +                       provider->aggregate(n, 0, n->avg_bw, 
>> n->peak_bw,
>> +                                               &agg_avg, &agg_peak);
>> +       }
> 
> Maybe make this a function like
> 
> 	agg_peak = qcom_icc_calc_aggregate_peak();
> 
> so the indenting of the list_for_each_entry can be avoided
> 
> 
> 	if (qp->per_core_dcvs)
> 		return dst->peak_bw;
> 
> 
>         list_for_each_entry(n, &provider->nodes, node_list)
> 	      provider->aggregate(n, 0, n->avg_bw, n->peak_bw, &agg_avg,
> 	      			  &agg_peak);
> 
> 	return agg_peak;
> 
> 
> you get the idea.
> 
Thanks, will fix this in v5.

>> @@ -173,7 +235,10 @@ static int qcom_icc_set(struct icc_node *src, 
>> struct icc_node *dst)
> 
> This function name really should be different. There are other
> qcom_icc_set()s already so the tag space is cluttered.
> 
will address this in v5.
>>                         break;
>>         }
>> 
>> -       writel_relaxed(index, qp->base + qp->reg_perf_state);
>> +       if (qp->per_core_dcvs)
>> +               writel_relaxed(index, 
>> EPSS_L3_VOTE_REG(qp->domain_base[qn->domain], qn->cpu));
>> +       else
>> +               writel_relaxed(index, qp->domain_base[qn->domain] + 
>> qp->reg_perf_state);
>> 
>>         return 0;
>>  }
>> @@ -194,11 +259,12 @@ static int qcom_osm_l3_probe(struct 
>> platform_device *pdev)
>>         const struct qcom_icc_desc *desc;
>>         struct icc_onecell_data *data;
>>         struct icc_provider *provider;
>> +       struct property *prop;
>>         const struct qcom_icc_node **qnodes;
>>         struct icc_node *node;
>>         size_t num_nodes;
>>         struct clk *clk;
>> -       int ret;
>> +       int ret, index, domain_count;
>> 
>>         clk = clk_get(&pdev->dev, "xo");
>>         if (IS_ERR(clk))
>> @@ -218,12 +284,21 @@ static int qcom_osm_l3_probe(struct 
>> platform_device *pdev)
>>         if (!qp)
>>                 return -ENOMEM;
>> 
>> -       qp->base = devm_platform_ioremap_resource(pdev, 0);
>> -       if (IS_ERR(qp->base))
>> -               return PTR_ERR(qp->base);
>> +       prop = of_find_property(pdev->dev.of_node, "reg", NULL);
>> +       if (!prop)
>> +               return -EINVAL;
>> +       domain_count = prop->length / (4 * sizeof(prop->length));
>> +       if (!domain_count)
>> +               return -EINVAL;
> 
> This is counting reg properties? Most definitely this is wrong as
> #address-cells or #size-cells could be different than what this code is
> expecting. Maybe roll a loop over of_get_address() and then consider
> using that? Or just hardcode the expected number of reg properties 
> based
> on the compatible string.
> 
Thanks, will fix this in v5.
>> +
>> +       for (index = 0; index < domain_count ; index++) {
>> +               qp->domain_base[index] = 
>> devm_platform_ioremap_resource(pdev, index);
>> +               if (IS_ERR(qp->domain_base[index]))
>> +                       return PTR_ERR(qp->domain_base[index]);
>> +       }
>> 
>>         /* HW should be in enabled state to proceed */
>> -       if (!(readl_relaxed(qp->base + REG_ENABLE) & 0x1)) {
>> +       if (!(readl_relaxed(qp->domain_base[0] + REG_ENABLE) & 0x1)) {
>>                 dev_err(&pdev->dev, "error hardware not enabled\n");
>>                 return -ENODEV;
>>         }
>> @@ -235,7 +310,7 @@ static int qcom_osm_l3_probe(struct 
>> platform_device *pdev)
>>         qp->reg_perf_state = desc->reg_perf_state;
>> 
>>         for (i = 0; i < LUT_MAX_ENTRIES; i++) {
>> -               info = readl_relaxed(qp->base + desc->reg_freq_lut +
>> +               info = readl_relaxed(qp->domain_base[0] + 
>> desc->reg_freq_lut +
> 
> So is the first address a special "global" IO region that hols the LUT
> for everyone?
> 
yes
>>                                      i * desc->lut_row_size);
>>                 src = FIELD_GET(LUT_SRC, info);
>>                 lval = FIELD_GET(LUT_L_VAL, info);
>> @@ -254,6 +329,7 @@ static int qcom_osm_l3_probe(struct 
>> platform_device *pdev)
>>                 prev_freq = freq;
>>         }
>>         qp->max_state = i;
>> +       qp->per_core_dcvs = desc->per_core_dcvs;
>> 
>>         qnodes = desc->nodes;
>>         num_nodes = desc->num_nodes;
>> diff --git a/drivers/interconnect/qcom/sc7280.h 
>> b/drivers/interconnect/qcom/sc7280.h
>> index 175e400..5df7600 100644
>> --- a/drivers/interconnect/qcom/sc7280.h
>> +++ b/drivers/interconnect/qcom/sc7280.h
>> @@ -150,5 +150,15 @@
>>  #define SC7280_SLAVE_PCIE_1                    139
>>  #define SC7280_SLAVE_QDSS_STM                  140
>>  #define SC7280_SLAVE_TCU                       141
>> +#define SC7280_MASTER_EPSS_L3_APPS                     142
>> +#define SC7280_SLAVE_EPSS_L3_SHARED                    143
>> +#define SC7280_SLAVE_EPSS_L3_CPU0                      144
>> +#define SC7280_SLAVE_EPSS_L3_CPU1                      145
>> +#define SC7280_SLAVE_EPSS_L3_CPU2                      146
>> +#define SC7280_SLAVE_EPSS_L3_CPU3                      147
>> +#define SC7280_SLAVE_EPSS_L3_CPU4                      148
>> +#define SC7280_SLAVE_EPSS_L3_CPU5                      149
>> +#define SC7280_SLAVE_EPSS_L3_CPU6                      150
>> +#define SC7280_SLAVE_EPSS_L3_CPU7                      151
> 
> Can we stop using master and slave here? I know it's part of AXI
> terminology but I'm hoping they've come up with some better terms to 
> use
> now.

We will keep this for now for sc7280 as these names are already being 
used by clients. we will move to new terminology in new provider 
drivers.