linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] soc: mediatek: Fix random hang up issue while kernel init
@ 2015-09-25  6:31 James Liao
  2015-09-25  8:26 ` Lucas Stach
  2015-10-06  1:57 ` Daniel Kurtz
  0 siblings, 2 replies; 14+ messages in thread
From: James Liao @ 2015-09-25  6:31 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Matthias Brugger, srv_heupstream, Kevin Hilman, Daniel Kurtz,
	Sascha Hauer, devicetree, linux-kernel, linux-mediatek,
	James Liao

In kernel late init, it turns off all unused clocks, which
needs to access subsystem registers such as VENC and VENC_LT.

Accessing MT8173 VENC registers needs two top clocks, mm_sel and
venc_sel. Accessing VENC_LT registers needs mm_sel and venclt_sel.
So we need to keep these clocks on before accessing their registers.

This patch keeps venc_sel / venclt_sel clock on when
VENC / VENC_LT's power is on, to prevent system hang up while
accessing its registeres.

Signed-off-by: James Liao <jamesjj.liao@mediatek.com>
---

This patch is based on v4.3-rc2, to fix system hanging up issue
while disabling unused clocks.

 arch/arm64/boot/dts/mediatek/mt8173.dtsi |  6 ++-
 drivers/soc/mediatek/mtk-scpsys.c        | 67 +++++++++++++++++++++-----------
 2 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index d18ee42..188917f 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -227,8 +227,10 @@
 			#power-domain-cells = <1>;
 			reg = <0 0x10006000 0 0x1000>;
 			clocks = <&clk26m>,
-				 <&topckgen CLK_TOP_MM_SEL>;
-			clock-names = "mfg", "mm";
+				 <&topckgen CLK_TOP_MM_SEL>,
+				 <&topckgen CLK_TOP_VENC_SEL>,
+				 <&topckgen CLK_TOP_VENC_LT_SEL>;
+			clock-names = "mfg", "mm", "venc", "venc_lt";
 			infracfg = <&infracfg>;
 		};
 
diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c
index 164a7d8..06032ba 100644
--- a/drivers/soc/mediatek/mtk-scpsys.c
+++ b/drivers/soc/mediatek/mtk-scpsys.c
@@ -54,12 +54,16 @@
 #define PWR_STATUS_USB			BIT(25)
 
 enum clk_id {
+	MT8173_CLK_NONE,
 	MT8173_CLK_MM,
 	MT8173_CLK_MFG,
-	MT8173_CLK_NONE,
-	MT8173_CLK_MAX = MT8173_CLK_NONE,
+	MT8173_CLK_VENC,
+	MT8173_CLK_VENC_LT,
+	MT8173_CLK_MAX,
 };
 
+#define MAX_CLKS	2
+
 struct scp_domain_data {
 	const char *name;
 	u32 sta_mask;
@@ -67,7 +71,7 @@ struct scp_domain_data {
 	u32 sram_pdn_bits;
 	u32 sram_pdn_ack_bits;
 	u32 bus_prot_mask;
-	enum clk_id clk_id;
+	enum clk_id clk_id[MAX_CLKS];
 };
 
 static const struct scp_domain_data scp_domain_data[] __initconst = {
@@ -77,7 +81,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_VDE_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = GENMASK(12, 12),
-		.clk_id = MT8173_CLK_MM,
+		.clk_id = {MT8173_CLK_MM},
 	},
 	[MT8173_POWER_DOMAIN_VENC] = {
 		.name = "venc",
@@ -85,7 +89,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_VEN_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = GENMASK(15, 12),
-		.clk_id = MT8173_CLK_MM,
+		.clk_id = {MT8173_CLK_MM, MT8173_CLK_VENC},
 	},
 	[MT8173_POWER_DOMAIN_ISP] = {
 		.name = "isp",
@@ -93,7 +97,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_ISP_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = GENMASK(13, 12),
-		.clk_id = MT8173_CLK_MM,
+		.clk_id = {MT8173_CLK_MM},
 	},
 	[MT8173_POWER_DOMAIN_MM] = {
 		.name = "mm",
@@ -101,7 +105,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_DIS_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = GENMASK(12, 12),
-		.clk_id = MT8173_CLK_MM,
+		.clk_id = {MT8173_CLK_MM},
 		.bus_prot_mask = MT8173_TOP_AXI_PROT_EN_MM_M0 |
 			MT8173_TOP_AXI_PROT_EN_MM_M1,
 	},
@@ -111,7 +115,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_VEN2_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = GENMASK(15, 12),
-		.clk_id = MT8173_CLK_MM,
+		.clk_id = {MT8173_CLK_MM, MT8173_CLK_VENC_LT},
 	},
 	[MT8173_POWER_DOMAIN_AUDIO] = {
 		.name = "audio",
@@ -119,7 +123,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_AUDIO_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = GENMASK(15, 12),
-		.clk_id = MT8173_CLK_NONE,
+		.clk_id = {MT8173_CLK_NONE},
 	},
 	[MT8173_POWER_DOMAIN_USB] = {
 		.name = "usb",
@@ -127,7 +131,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_USB_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = GENMASK(15, 12),
-		.clk_id = MT8173_CLK_NONE,
+		.clk_id = {MT8173_CLK_NONE},
 	},
 	[MT8173_POWER_DOMAIN_MFG_ASYNC] = {
 		.name = "mfg_async",
@@ -135,7 +139,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_MFG_ASYNC_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = 0,
-		.clk_id = MT8173_CLK_MFG,
+		.clk_id = {MT8173_CLK_MFG},
 	},
 	[MT8173_POWER_DOMAIN_MFG_2D] = {
 		.name = "mfg_2d",
@@ -143,7 +147,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_MFG_2D_PWR_CON,
 		.sram_pdn_bits = GENMASK(11, 8),
 		.sram_pdn_ack_bits = GENMASK(13, 12),
-		.clk_id = MT8173_CLK_NONE,
+		.clk_id = {MT8173_CLK_NONE},
 	},
 	[MT8173_POWER_DOMAIN_MFG] = {
 		.name = "mfg",
@@ -151,7 +155,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
 		.ctl_offs = SPM_MFG_PWR_CON,
 		.sram_pdn_bits = GENMASK(13, 8),
 		.sram_pdn_ack_bits = GENMASK(21, 16),
-		.clk_id = MT8173_CLK_NONE,
+		.clk_id = {MT8173_CLK_NONE},
 		.bus_prot_mask = MT8173_TOP_AXI_PROT_EN_MFG_S |
 			MT8173_TOP_AXI_PROT_EN_MFG_M0 |
 			MT8173_TOP_AXI_PROT_EN_MFG_M1 |
@@ -166,7 +170,7 @@ struct scp;
 struct scp_domain {
 	struct generic_pm_domain genpd;
 	struct scp *scp;
-	struct clk *clk;
+	struct clk *clk[MAX_CLKS];
 	u32 sta_mask;
 	void __iomem *ctl_addr;
 	u32 sram_pdn_bits;
@@ -212,11 +216,16 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
 	u32 sram_pdn_ack = scpd->sram_pdn_ack_bits;
 	u32 val;
 	int ret;
+	int i;
+
+	for (i = 0; i < MAX_CLKS && scpd->clk[i]; i++) {
+		ret = clk_prepare_enable(scpd->clk[i]);
+		if (ret) {
+			for (--i; i >= 0; i--)
+				clk_disable_unprepare(scpd->clk[i]);
 
-	if (scpd->clk) {
-		ret = clk_prepare_enable(scpd->clk);
-		if (ret)
 			goto err_clk;
+		}
 	}
 
 	val = readl(ctl_addr);
@@ -282,7 +291,10 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
 	return 0;
 
 err_pwr_ack:
-	clk_disable_unprepare(scpd->clk);
+	for (i = MAX_CLKS - 1; i >= 0; i--) {
+		if (scpd->clk[i])
+			clk_disable_unprepare(scpd->clk[i]);
+	}
 err_clk:
 	dev_err(scp->dev, "Failed to power on domain %s\n", genpd->name);
 
@@ -299,6 +311,7 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
 	u32 pdn_ack = scpd->sram_pdn_ack_bits;
 	u32 val;
 	int ret;
+	int i;
 
 	if (scpd->bus_prot_mask) {
 		ret = mtk_infracfg_set_bus_protection(scp->infracfg,
@@ -360,8 +373,8 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
 			expired = true;
 	}
 
-	if (scpd->clk)
-		clk_disable_unprepare(scpd->clk);
+	for (i = 0; i < MAX_CLKS && scpd->clk[i]; i++)
+		clk_disable_unprepare(scpd->clk[i]);
 
 	return 0;
 
@@ -375,7 +388,7 @@ static int __init scpsys_probe(struct platform_device *pdev)
 {
 	struct genpd_onecell_data *pd_data;
 	struct resource *res;
-	int i, ret;
+	int i, j, ret;
 	struct scp *scp;
 	struct clk *clk[MT8173_CLK_MAX];
 
@@ -405,6 +418,14 @@ static int __init scpsys_probe(struct platform_device *pdev)
 	if (IS_ERR(clk[MT8173_CLK_MFG]))
 		return PTR_ERR(clk[MT8173_CLK_MFG]);
 
+	clk[MT8173_CLK_VENC] = devm_clk_get(&pdev->dev, "venc");
+	if (IS_ERR(clk[MT8173_CLK_VENC]))
+		return PTR_ERR(clk[MT8173_CLK_VENC]);
+
+	clk[MT8173_CLK_VENC_LT] = devm_clk_get(&pdev->dev, "venc_lt");
+	if (IS_ERR(clk[MT8173_CLK_VENC_LT]))
+		return PTR_ERR(clk[MT8173_CLK_VENC_LT]);
+
 	scp->infracfg = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
 			"infracfg");
 	if (IS_ERR(scp->infracfg)) {
@@ -428,8 +449,8 @@ static int __init scpsys_probe(struct platform_device *pdev)
 		scpd->sram_pdn_bits = data->sram_pdn_bits;
 		scpd->sram_pdn_ack_bits = data->sram_pdn_ack_bits;
 		scpd->bus_prot_mask = data->bus_prot_mask;
-		if (data->clk_id != MT8173_CLK_NONE)
-			scpd->clk = clk[data->clk_id];
+		for (j = 0; j < MAX_CLKS && data->clk_id[j]; j++)
+			scpd->clk[j] = clk[data->clk_id[j]];
 
 		genpd->name = data->name;
 		genpd->power_off = scpsys_power_off;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-09-25  6:31 [PATCH] soc: mediatek: Fix random hang up issue while kernel init James Liao
@ 2015-09-25  8:26 ` Lucas Stach
  2015-09-27 11:25   ` Matthias Brugger
  2015-10-06  1:57 ` Daniel Kurtz
  1 sibling, 1 reply; 14+ messages in thread
From: Lucas Stach @ 2015-09-25  8:26 UTC (permalink / raw)
  To: James Liao
  Cc: linux-arm-kernel, devicetree, srv_heupstream, Kevin Hilman,
	linux-kernel, Daniel Kurtz, linux-mediatek, Sascha Hauer,
	Matthias Brugger

Am Freitag, den 25.09.2015, 14:31 +0800 schrieb James Liao:
> In kernel late init, it turns off all unused clocks, which
> needs to access subsystem registers such as VENC and VENC_LT.
> 
> Accessing MT8173 VENC registers needs two top clocks, mm_sel and
> venc_sel. Accessing VENC_LT registers needs mm_sel and venclt_sel.
> So we need to keep these clocks on before accessing their registers.
> 
> This patch keeps venc_sel / venclt_sel clock on when
> VENC / VENC_LT's power is on, to prevent system hang up while
> accessing its registeres.
> 
This changes the binding of an existing driver, so it needs some more
consideration. Are VENC_SEL and VENC_LT_SEL really direct clock inputs
to the VENC module, or are they some kind of parent clock for one of the
existing clocks used by the old VENC binding?

If they are direct clock inputs to the VENC module, changing the binding
might be still ok at that point, as there shouldn't be many users of
that yet. But then we at least need a corresponding change to the
binding documentation.

Regards,
Lucas

> Signed-off-by: James Liao <jamesjj.liao@mediatek.com>
> ---
> 
> This patch is based on v4.3-rc2, to fix system hanging up issue
> while disabling unused clocks.
> 
>  arch/arm64/boot/dts/mediatek/mt8173.dtsi |  6 ++-
>  drivers/soc/mediatek/mtk-scpsys.c        | 67 +++++++++++++++++++++-----------
>  2 files changed, 48 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
> index d18ee42..188917f 100644
> --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
> +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
> @@ -227,8 +227,10 @@
>  			#power-domain-cells = <1>;
>  			reg = <0 0x10006000 0 0x1000>;
>  			clocks = <&clk26m>,
> -				 <&topckgen CLK_TOP_MM_SEL>;
> -			clock-names = "mfg", "mm";
> +				 <&topckgen CLK_TOP_MM_SEL>,
> +				 <&topckgen CLK_TOP_VENC_SEL>,
> +				 <&topckgen CLK_TOP_VENC_LT_SEL>;
> +			clock-names = "mfg", "mm", "venc", "venc_lt";
>  			infracfg = <&infracfg>;
>  		};
>  
> diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c
> index 164a7d8..06032ba 100644
> --- a/drivers/soc/mediatek/mtk-scpsys.c
> +++ b/drivers/soc/mediatek/mtk-scpsys.c
> @@ -54,12 +54,16 @@
>  #define PWR_STATUS_USB			BIT(25)
>  
>  enum clk_id {
> +	MT8173_CLK_NONE,
>  	MT8173_CLK_MM,
>  	MT8173_CLK_MFG,
> -	MT8173_CLK_NONE,
> -	MT8173_CLK_MAX = MT8173_CLK_NONE,
> +	MT8173_CLK_VENC,
> +	MT8173_CLK_VENC_LT,
> +	MT8173_CLK_MAX,
>  };
>  
> +#define MAX_CLKS	2
> +
>  struct scp_domain_data {
>  	const char *name;
>  	u32 sta_mask;
> @@ -67,7 +71,7 @@ struct scp_domain_data {
>  	u32 sram_pdn_bits;
>  	u32 sram_pdn_ack_bits;
>  	u32 bus_prot_mask;
> -	enum clk_id clk_id;
> +	enum clk_id clk_id[MAX_CLKS];
>  };
>  
>  static const struct scp_domain_data scp_domain_data[] __initconst = {
> @@ -77,7 +81,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_VDE_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = GENMASK(12, 12),
> -		.clk_id = MT8173_CLK_MM,
> +		.clk_id = {MT8173_CLK_MM},
>  	},
>  	[MT8173_POWER_DOMAIN_VENC] = {
>  		.name = "venc",
> @@ -85,7 +89,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_VEN_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = GENMASK(15, 12),
> -		.clk_id = MT8173_CLK_MM,
> +		.clk_id = {MT8173_CLK_MM, MT8173_CLK_VENC},
>  	},
>  	[MT8173_POWER_DOMAIN_ISP] = {
>  		.name = "isp",
> @@ -93,7 +97,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_ISP_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = GENMASK(13, 12),
> -		.clk_id = MT8173_CLK_MM,
> +		.clk_id = {MT8173_CLK_MM},
>  	},
>  	[MT8173_POWER_DOMAIN_MM] = {
>  		.name = "mm",
> @@ -101,7 +105,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_DIS_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = GENMASK(12, 12),
> -		.clk_id = MT8173_CLK_MM,
> +		.clk_id = {MT8173_CLK_MM},
>  		.bus_prot_mask = MT8173_TOP_AXI_PROT_EN_MM_M0 |
>  			MT8173_TOP_AXI_PROT_EN_MM_M1,
>  	},
> @@ -111,7 +115,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_VEN2_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = GENMASK(15, 12),
> -		.clk_id = MT8173_CLK_MM,
> +		.clk_id = {MT8173_CLK_MM, MT8173_CLK_VENC_LT},
>  	},
>  	[MT8173_POWER_DOMAIN_AUDIO] = {
>  		.name = "audio",
> @@ -119,7 +123,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_AUDIO_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = GENMASK(15, 12),
> -		.clk_id = MT8173_CLK_NONE,
> +		.clk_id = {MT8173_CLK_NONE},
>  	},
>  	[MT8173_POWER_DOMAIN_USB] = {
>  		.name = "usb",
> @@ -127,7 +131,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_USB_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = GENMASK(15, 12),
> -		.clk_id = MT8173_CLK_NONE,
> +		.clk_id = {MT8173_CLK_NONE},
>  	},
>  	[MT8173_POWER_DOMAIN_MFG_ASYNC] = {
>  		.name = "mfg_async",
> @@ -135,7 +139,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_MFG_ASYNC_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = 0,
> -		.clk_id = MT8173_CLK_MFG,
> +		.clk_id = {MT8173_CLK_MFG},
>  	},
>  	[MT8173_POWER_DOMAIN_MFG_2D] = {
>  		.name = "mfg_2d",
> @@ -143,7 +147,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_MFG_2D_PWR_CON,
>  		.sram_pdn_bits = GENMASK(11, 8),
>  		.sram_pdn_ack_bits = GENMASK(13, 12),
> -		.clk_id = MT8173_CLK_NONE,
> +		.clk_id = {MT8173_CLK_NONE},
>  	},
>  	[MT8173_POWER_DOMAIN_MFG] = {
>  		.name = "mfg",
> @@ -151,7 +155,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>  		.ctl_offs = SPM_MFG_PWR_CON,
>  		.sram_pdn_bits = GENMASK(13, 8),
>  		.sram_pdn_ack_bits = GENMASK(21, 16),
> -		.clk_id = MT8173_CLK_NONE,
> +		.clk_id = {MT8173_CLK_NONE},
>  		.bus_prot_mask = MT8173_TOP_AXI_PROT_EN_MFG_S |
>  			MT8173_TOP_AXI_PROT_EN_MFG_M0 |
>  			MT8173_TOP_AXI_PROT_EN_MFG_M1 |
> @@ -166,7 +170,7 @@ struct scp;
>  struct scp_domain {
>  	struct generic_pm_domain genpd;
>  	struct scp *scp;
> -	struct clk *clk;
> +	struct clk *clk[MAX_CLKS];
>  	u32 sta_mask;
>  	void __iomem *ctl_addr;
>  	u32 sram_pdn_bits;
> @@ -212,11 +216,16 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
>  	u32 sram_pdn_ack = scpd->sram_pdn_ack_bits;
>  	u32 val;
>  	int ret;
> +	int i;
> +
> +	for (i = 0; i < MAX_CLKS && scpd->clk[i]; i++) {
> +		ret = clk_prepare_enable(scpd->clk[i]);
> +		if (ret) {
> +			for (--i; i >= 0; i--)
> +				clk_disable_unprepare(scpd->clk[i]);
>  
> -	if (scpd->clk) {
> -		ret = clk_prepare_enable(scpd->clk);
> -		if (ret)
>  			goto err_clk;
> +		}
>  	}
>  
>  	val = readl(ctl_addr);
> @@ -282,7 +291,10 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
>  	return 0;
>  
>  err_pwr_ack:
> -	clk_disable_unprepare(scpd->clk);
> +	for (i = MAX_CLKS - 1; i >= 0; i--) {
> +		if (scpd->clk[i])
> +			clk_disable_unprepare(scpd->clk[i]);
> +	}
>  err_clk:
>  	dev_err(scp->dev, "Failed to power on domain %s\n", genpd->name);
>  
> @@ -299,6 +311,7 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
>  	u32 pdn_ack = scpd->sram_pdn_ack_bits;
>  	u32 val;
>  	int ret;
> +	int i;
>  
>  	if (scpd->bus_prot_mask) {
>  		ret = mtk_infracfg_set_bus_protection(scp->infracfg,
> @@ -360,8 +373,8 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
>  			expired = true;
>  	}
>  
> -	if (scpd->clk)
> -		clk_disable_unprepare(scpd->clk);
> +	for (i = 0; i < MAX_CLKS && scpd->clk[i]; i++)
> +		clk_disable_unprepare(scpd->clk[i]);
>  
>  	return 0;
>  
> @@ -375,7 +388,7 @@ static int __init scpsys_probe(struct platform_device *pdev)
>  {
>  	struct genpd_onecell_data *pd_data;
>  	struct resource *res;
> -	int i, ret;
> +	int i, j, ret;
>  	struct scp *scp;
>  	struct clk *clk[MT8173_CLK_MAX];
>  
> @@ -405,6 +418,14 @@ static int __init scpsys_probe(struct platform_device *pdev)
>  	if (IS_ERR(clk[MT8173_CLK_MFG]))
>  		return PTR_ERR(clk[MT8173_CLK_MFG]);
>  
> +	clk[MT8173_CLK_VENC] = devm_clk_get(&pdev->dev, "venc");
> +	if (IS_ERR(clk[MT8173_CLK_VENC]))
> +		return PTR_ERR(clk[MT8173_CLK_VENC]);
> +
> +	clk[MT8173_CLK_VENC_LT] = devm_clk_get(&pdev->dev, "venc_lt");
> +	if (IS_ERR(clk[MT8173_CLK_VENC_LT]))
> +		return PTR_ERR(clk[MT8173_CLK_VENC_LT]);
> +
>  	scp->infracfg = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
>  			"infracfg");
>  	if (IS_ERR(scp->infracfg)) {
> @@ -428,8 +449,8 @@ static int __init scpsys_probe(struct platform_device *pdev)
>  		scpd->sram_pdn_bits = data->sram_pdn_bits;
>  		scpd->sram_pdn_ack_bits = data->sram_pdn_ack_bits;
>  		scpd->bus_prot_mask = data->bus_prot_mask;
> -		if (data->clk_id != MT8173_CLK_NONE)
> -			scpd->clk = clk[data->clk_id];
> +		for (j = 0; j < MAX_CLKS && data->clk_id[j]; j++)
> +			scpd->clk[j] = clk[data->clk_id[j]];
>  
>  		genpd->name = data->name;
>  		genpd->power_off = scpsys_power_off;

-- 
Pengutronix e.K.             | Lucas Stach                 |
Industrial Linux Solutions   | http://www.pengutronix.de/  |


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-09-25  8:26 ` Lucas Stach
@ 2015-09-27 11:25   ` Matthias Brugger
  2015-09-29 10:25     ` Daniel Kurtz
  0 siblings, 1 reply; 14+ messages in thread
From: Matthias Brugger @ 2015-09-27 11:25 UTC (permalink / raw)
  To: Lucas Stach, James Liao
  Cc: linux-arm-kernel, devicetree, srv_heupstream, Kevin Hilman,
	linux-kernel, Daniel Kurtz, linux-mediatek, Sascha Hauer



On 25/09/15 10:26, Lucas Stach wrote:
> Am Freitag, den 25.09.2015, 14:31 +0800 schrieb James Liao:
>> In kernel late init, it turns off all unused clocks, which
>> needs to access subsystem registers such as VENC and VENC_LT.
>>
>> Accessing MT8173 VENC registers needs two top clocks, mm_sel and
>> venc_sel. Accessing VENC_LT registers needs mm_sel and venclt_sel.
>> So we need to keep these clocks on before accessing their registers.
>>
>> This patch keeps venc_sel / venclt_sel clock on when
>> VENC / VENC_LT's power is on, to prevent system hang up while
>> accessing its registeres.
>>
> This changes the binding of an existing driver, so it needs some more
> consideration. Are VENC_SEL and VENC_LT_SEL really direct clock inputs
> to the VENC module, or are they some kind of parent clock for one of the
> existing clocks used by the old VENC binding?
>
> If they are direct clock inputs to the VENC module, changing the binding
> might be still ok at that point, as there shouldn't be many users of
> that yet. But then we at least need a corresponding change to the
> binding documentation.
>

Yes, we will need a really good reason to change the bindings. Apart 
from that we would need to find a solution which works with the old (and 
wrong bindings) as well.

Apart from that, please send the dtsi part as a seperate patch.

Thanks,
Matthias

> Regards,
> Lucas
>
>> Signed-off-by: James Liao <jamesjj.liao@mediatek.com>
>> ---
>>
>> This patch is based on v4.3-rc2, to fix system hanging up issue
>> while disabling unused clocks.
>>
>>   arch/arm64/boot/dts/mediatek/mt8173.dtsi |  6 ++-
>>   drivers/soc/mediatek/mtk-scpsys.c        | 67 +++++++++++++++++++++-----------
>>   2 files changed, 48 insertions(+), 25 deletions(-)
>>
>> diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
>> index d18ee42..188917f 100644
>> --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
>> +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
>> @@ -227,8 +227,10 @@
>>   			#power-domain-cells = <1>;
>>   			reg = <0 0x10006000 0 0x1000>;
>>   			clocks = <&clk26m>,
>> -				 <&topckgen CLK_TOP_MM_SEL>;
>> -			clock-names = "mfg", "mm";
>> +				 <&topckgen CLK_TOP_MM_SEL>,
>> +				 <&topckgen CLK_TOP_VENC_SEL>,
>> +				 <&topckgen CLK_TOP_VENC_LT_SEL>;
>> +			clock-names = "mfg", "mm", "venc", "venc_lt";
>>   			infracfg = <&infracfg>;
>>   		};
>>
>> diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c
>> index 164a7d8..06032ba 100644
>> --- a/drivers/soc/mediatek/mtk-scpsys.c
>> +++ b/drivers/soc/mediatek/mtk-scpsys.c
>> @@ -54,12 +54,16 @@
>>   #define PWR_STATUS_USB			BIT(25)
>>
>>   enum clk_id {
>> +	MT8173_CLK_NONE,
>>   	MT8173_CLK_MM,
>>   	MT8173_CLK_MFG,
>> -	MT8173_CLK_NONE,
>> -	MT8173_CLK_MAX = MT8173_CLK_NONE,
>> +	MT8173_CLK_VENC,
>> +	MT8173_CLK_VENC_LT,
>> +	MT8173_CLK_MAX,
>>   };
>>
>> +#define MAX_CLKS	2
>> +
>>   struct scp_domain_data {
>>   	const char *name;
>>   	u32 sta_mask;
>> @@ -67,7 +71,7 @@ struct scp_domain_data {
>>   	u32 sram_pdn_bits;
>>   	u32 sram_pdn_ack_bits;
>>   	u32 bus_prot_mask;
>> -	enum clk_id clk_id;
>> +	enum clk_id clk_id[MAX_CLKS];
>>   };
>>
>>   static const struct scp_domain_data scp_domain_data[] __initconst = {
>> @@ -77,7 +81,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_VDE_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = GENMASK(12, 12),
>> -		.clk_id = MT8173_CLK_MM,
>> +		.clk_id = {MT8173_CLK_MM},
>>   	},
>>   	[MT8173_POWER_DOMAIN_VENC] = {
>>   		.name = "venc",
>> @@ -85,7 +89,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_VEN_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = GENMASK(15, 12),
>> -		.clk_id = MT8173_CLK_MM,
>> +		.clk_id = {MT8173_CLK_MM, MT8173_CLK_VENC},
>>   	},
>>   	[MT8173_POWER_DOMAIN_ISP] = {
>>   		.name = "isp",
>> @@ -93,7 +97,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_ISP_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = GENMASK(13, 12),
>> -		.clk_id = MT8173_CLK_MM,
>> +		.clk_id = {MT8173_CLK_MM},
>>   	},
>>   	[MT8173_POWER_DOMAIN_MM] = {
>>   		.name = "mm",
>> @@ -101,7 +105,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_DIS_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = GENMASK(12, 12),
>> -		.clk_id = MT8173_CLK_MM,
>> +		.clk_id = {MT8173_CLK_MM},
>>   		.bus_prot_mask = MT8173_TOP_AXI_PROT_EN_MM_M0 |
>>   			MT8173_TOP_AXI_PROT_EN_MM_M1,
>>   	},
>> @@ -111,7 +115,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_VEN2_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = GENMASK(15, 12),
>> -		.clk_id = MT8173_CLK_MM,
>> +		.clk_id = {MT8173_CLK_MM, MT8173_CLK_VENC_LT},
>>   	},
>>   	[MT8173_POWER_DOMAIN_AUDIO] = {
>>   		.name = "audio",
>> @@ -119,7 +123,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_AUDIO_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = GENMASK(15, 12),
>> -		.clk_id = MT8173_CLK_NONE,
>> +		.clk_id = {MT8173_CLK_NONE},
>>   	},
>>   	[MT8173_POWER_DOMAIN_USB] = {
>>   		.name = "usb",
>> @@ -127,7 +131,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_USB_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = GENMASK(15, 12),
>> -		.clk_id = MT8173_CLK_NONE,
>> +		.clk_id = {MT8173_CLK_NONE},
>>   	},
>>   	[MT8173_POWER_DOMAIN_MFG_ASYNC] = {
>>   		.name = "mfg_async",
>> @@ -135,7 +139,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_MFG_ASYNC_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = 0,
>> -		.clk_id = MT8173_CLK_MFG,
>> +		.clk_id = {MT8173_CLK_MFG},
>>   	},
>>   	[MT8173_POWER_DOMAIN_MFG_2D] = {
>>   		.name = "mfg_2d",
>> @@ -143,7 +147,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_MFG_2D_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(11, 8),
>>   		.sram_pdn_ack_bits = GENMASK(13, 12),
>> -		.clk_id = MT8173_CLK_NONE,
>> +		.clk_id = {MT8173_CLK_NONE},
>>   	},
>>   	[MT8173_POWER_DOMAIN_MFG] = {
>>   		.name = "mfg",
>> @@ -151,7 +155,7 @@ static const struct scp_domain_data scp_domain_data[] __initconst = {
>>   		.ctl_offs = SPM_MFG_PWR_CON,
>>   		.sram_pdn_bits = GENMASK(13, 8),
>>   		.sram_pdn_ack_bits = GENMASK(21, 16),
>> -		.clk_id = MT8173_CLK_NONE,
>> +		.clk_id = {MT8173_CLK_NONE},
>>   		.bus_prot_mask = MT8173_TOP_AXI_PROT_EN_MFG_S |
>>   			MT8173_TOP_AXI_PROT_EN_MFG_M0 |
>>   			MT8173_TOP_AXI_PROT_EN_MFG_M1 |
>> @@ -166,7 +170,7 @@ struct scp;
>>   struct scp_domain {
>>   	struct generic_pm_domain genpd;
>>   	struct scp *scp;
>> -	struct clk *clk;
>> +	struct clk *clk[MAX_CLKS];
>>   	u32 sta_mask;
>>   	void __iomem *ctl_addr;
>>   	u32 sram_pdn_bits;
>> @@ -212,11 +216,16 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
>>   	u32 sram_pdn_ack = scpd->sram_pdn_ack_bits;
>>   	u32 val;
>>   	int ret;
>> +	int i;
>> +
>> +	for (i = 0; i < MAX_CLKS && scpd->clk[i]; i++) {
>> +		ret = clk_prepare_enable(scpd->clk[i]);
>> +		if (ret) {
>> +			for (--i; i >= 0; i--)
>> +				clk_disable_unprepare(scpd->clk[i]);
>>
>> -	if (scpd->clk) {
>> -		ret = clk_prepare_enable(scpd->clk);
>> -		if (ret)
>>   			goto err_clk;
>> +		}
>>   	}
>>
>>   	val = readl(ctl_addr);
>> @@ -282,7 +291,10 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
>>   	return 0;
>>
>>   err_pwr_ack:
>> -	clk_disable_unprepare(scpd->clk);
>> +	for (i = MAX_CLKS - 1; i >= 0; i--) {
>> +		if (scpd->clk[i])
>> +			clk_disable_unprepare(scpd->clk[i]);
>> +	}
>>   err_clk:
>>   	dev_err(scp->dev, "Failed to power on domain %s\n", genpd->name);
>>
>> @@ -299,6 +311,7 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
>>   	u32 pdn_ack = scpd->sram_pdn_ack_bits;
>>   	u32 val;
>>   	int ret;
>> +	int i;
>>
>>   	if (scpd->bus_prot_mask) {
>>   		ret = mtk_infracfg_set_bus_protection(scp->infracfg,
>> @@ -360,8 +373,8 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
>>   			expired = true;
>>   	}
>>
>> -	if (scpd->clk)
>> -		clk_disable_unprepare(scpd->clk);
>> +	for (i = 0; i < MAX_CLKS && scpd->clk[i]; i++)
>> +		clk_disable_unprepare(scpd->clk[i]);
>>
>>   	return 0;
>>
>> @@ -375,7 +388,7 @@ static int __init scpsys_probe(struct platform_device *pdev)
>>   {
>>   	struct genpd_onecell_data *pd_data;
>>   	struct resource *res;
>> -	int i, ret;
>> +	int i, j, ret;
>>   	struct scp *scp;
>>   	struct clk *clk[MT8173_CLK_MAX];
>>
>> @@ -405,6 +418,14 @@ static int __init scpsys_probe(struct platform_device *pdev)
>>   	if (IS_ERR(clk[MT8173_CLK_MFG]))
>>   		return PTR_ERR(clk[MT8173_CLK_MFG]);
>>
>> +	clk[MT8173_CLK_VENC] = devm_clk_get(&pdev->dev, "venc");
>> +	if (IS_ERR(clk[MT8173_CLK_VENC]))
>> +		return PTR_ERR(clk[MT8173_CLK_VENC]);
>> +
>> +	clk[MT8173_CLK_VENC_LT] = devm_clk_get(&pdev->dev, "venc_lt");
>> +	if (IS_ERR(clk[MT8173_CLK_VENC_LT]))
>> +		return PTR_ERR(clk[MT8173_CLK_VENC_LT]);
>> +
>>   	scp->infracfg = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
>>   			"infracfg");
>>   	if (IS_ERR(scp->infracfg)) {
>> @@ -428,8 +449,8 @@ static int __init scpsys_probe(struct platform_device *pdev)
>>   		scpd->sram_pdn_bits = data->sram_pdn_bits;
>>   		scpd->sram_pdn_ack_bits = data->sram_pdn_ack_bits;
>>   		scpd->bus_prot_mask = data->bus_prot_mask;
>> -		if (data->clk_id != MT8173_CLK_NONE)
>> -			scpd->clk = clk[data->clk_id];
>> +		for (j = 0; j < MAX_CLKS && data->clk_id[j]; j++)
>> +			scpd->clk[j] = clk[data->clk_id[j]];
>>
>>   		genpd->name = data->name;
>>   		genpd->power_off = scpsys_power_off;
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-09-27 11:25   ` Matthias Brugger
@ 2015-09-29 10:25     ` Daniel Kurtz
  2015-09-30  9:07       ` Lucas Stach
  2015-10-01  6:58       ` James Liao
  0 siblings, 2 replies; 14+ messages in thread
From: Daniel Kurtz @ 2015-09-29 10:25 UTC (permalink / raw)
  To: Matthias Brugger, Lucas Stach, Sascha Hauer
  Cc: James Liao, linux-arm-kernel, open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek

Hi Matthias & Lucas,

On Sun, Sep 27, 2015 at 7:25 PM, Matthias Brugger
<matthias.bgg@gmail.com> wrote:
>
>
>
> On 25/09/15 10:26, Lucas Stach wrote:
>>
>> Am Freitag, den 25.09.2015, 14:31 +0800 schrieb James Liao:
>>>
>>> In kernel late init, it turns off all unused clocks, which
>>> needs to access subsystem registers such as VENC and VENC_LT.
>>>
>>> Accessing MT8173 VENC registers needs two top clocks, mm_sel and
>>> venc_sel. Accessing VENC_LT registers needs mm_sel and venclt_sel.
>>> So we need to keep these clocks on before accessing their registers.
>>>
>>> This patch keeps venc_sel / venclt_sel clock on when
>>> VENC / VENC_LT's power is on, to prevent system hang up while
>>> accessing its registeres.
>>>
>> This changes the binding of an existing driver, so it needs some more
>> consideration. Are VENC_SEL and VENC_LT_SEL really direct clock inputs
>> to the VENC module, or are they some kind of parent clock for one of the
>> existing clocks used by the old VENC binding?
>>
>> If they are direct clock inputs to the VENC module, changing the binding
>> might be still ok at that point, as there shouldn't be many users of
>> that yet.

The MT8173 VENC subsystem clock registers are modeled by the vencsys
clock-controller node:

vencsys: clock-controller@18000000 {
  compatible = "mediatek,mt8173-vencsys", "syscon";
  reg = <0 0x18000000 0 0x1000>;
  #clock-cells = <1>;
};

These registers, however, are only accessible if:
  (a) the VENC power domain is enabled, and
  (b) the "venc_sel" clock is enabled.

According to James, venc_sel is a direct clock input the VENC module,
and it is an ancestor to some, but not all, of the subsystem clocks, in
particular it is not an ancestor to "venc_cke0":

 static const struct mtk_gate venc_clks[] __initconst = {
         GATE_VENC(CLK_VENC_CKE0, "venc_cke0", "mm_sel", 0),
         GATE_VENC(CLK_VENC_CKE1, "venc_cke1", "venc_sel", 4),
         GATE_VENC(CLK_VENC_CKE2, "venc_cke2", "venc_sel", 8),
         GATE_VENC(CLK_VENC_CKE3, "venc_cke3", "venc_sel", 12),
 };

If the VENC power domain is disabled, then accesses to the vencsys
registers just silently fail (i.e., reads probably
return all 0s).  However, if the power domain is ON but venc_sel is
disabled, then accessing the clock-controller registers results in a
system hang (until the power domain is disabled).

At boot the VENC power domain is forced on by scpsys (like all other
power domains).  Later both unused clocks and power domains get
disabled. If clocks are disabled first, and venc_sel is disabled
before venc_cke0, then when unused_clock tries to disable venc_cke0,
the system will hang when it reads the vencsys "is clock enabled" bit
since:
  (a) VENC power domain is still enabled, and
  (b) venc_sel is already disabled

In theory, we could add "clocks=<&topckgen CLK_TOP_VENC_SEL>;" to the
vencsys clock-controller node.  On initialization, mtk_vencsys_init()
could then pass venc_sel to the generic MT8173 gate clock driver, and
it would then clk_enable(venc_sel)/_disable(venc_sel) around any
access to the clock-controller registers.  James, however, thinks
this is a lot of extra overhead, and instead has proposed the fix in this patch,
where venc_sel is forced on whenever the VENC power domain is enabled.

So, this patch is a bit of a hack, but the mtk scpsys driver already
does something similar for the MM & MFG clocks - these clocks are
always enabled whenever certain power domains are enabled, and they
are only disabled when all such power domains are disabled.  I'm not
100% why these clocks must always be kept on whenever these power
domains are enabled, but probably to solve a similar problem where
these clocks are needed to access some registers at a time when these
clocks would not otherwise be explicitly enabled.

Is the above clear?

>> But then we at least need a corresponding change to the binding documentation.

Agreed.

> Yes, we will need a really good reason to change the bindings.

There is a race where the system can sometimes hang at boot with the
existing implementation & bindings.
This seems like a really good reason to change the bindings, if the
proposed solution is acceptable (it may be possible to fix the hang
without changing the bindings).

> Apart from that we would need to find a solution which works with the old (and wrong bindings) as well.

Why do we need to support the old bindings?
This patch fixes a problem in MT8173, for which the mainline kernel is
still under active bringup.
If the existing bindings are not used anywhere yet (*), it seems like
unnecessary overhead to enforce backwards compatibility at this stage.

(*) I don't actually know if this is true, perhaps only Mediatek can
answer this.

> Apart from that, please send the dtsi part as a seperate patch.

Agreed.

Thanks,
-Dan

>
> Thanks,
> Matthias
>
>
>> Regards,
>> Lucas

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-09-29 10:25     ` Daniel Kurtz
@ 2015-09-30  9:07       ` Lucas Stach
  2015-10-01  7:15         ` James Liao
  2015-10-01  6:58       ` James Liao
  1 sibling, 1 reply; 14+ messages in thread
From: Lucas Stach @ 2015-09-30  9:07 UTC (permalink / raw)
  To: Daniel Kurtz
  Cc: Matthias Brugger, Sascha Hauer, James Liao, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek

Hi Daniel,

Am Dienstag, den 29.09.2015, 18:25 +0800 schrieb Daniel Kurtz:
> Hi Matthias & Lucas,
> 
> On Sun, Sep 27, 2015 at 7:25 PM, Matthias Brugger
> <matthias.bgg@gmail.com> wrote:
> >
> >
> >
> > On 25/09/15 10:26, Lucas Stach wrote:
> >>
> >> Am Freitag, den 25.09.2015, 14:31 +0800 schrieb James Liao:
> >>>
> >>> In kernel late init, it turns off all unused clocks, which
> >>> needs to access subsystem registers such as VENC and VENC_LT.
> >>>
> >>> Accessing MT8173 VENC registers needs two top clocks, mm_sel and
> >>> venc_sel. Accessing VENC_LT registers needs mm_sel and venclt_sel.
> >>> So we need to keep these clocks on before accessing their registers.
> >>>
> >>> This patch keeps venc_sel / venclt_sel clock on when
> >>> VENC / VENC_LT's power is on, to prevent system hang up while
> >>> accessing its registeres.
> >>>
> >> This changes the binding of an existing driver, so it needs some more
> >> consideration. Are VENC_SEL and VENC_LT_SEL really direct clock inputs
> >> to the VENC module, or are they some kind of parent clock for one of the
> >> existing clocks used by the old VENC binding?
> >>
> >> If they are direct clock inputs to the VENC module, changing the binding
> >> might be still ok at that point, as there shouldn't be many users of
> >> that yet.
> 
> The MT8173 VENC subsystem clock registers are modeled by the vencsys
> clock-controller node:
> 
> vencsys: clock-controller@18000000 {
>   compatible = "mediatek,mt8173-vencsys", "syscon";
>   reg = <0 0x18000000 0 0x1000>;
>   #clock-cells = <1>;
> };
> 
> These registers, however, are only accessible if:
>   (a) the VENC power domain is enabled, and
>   (b) the "venc_sel" clock is enabled.
> 
> According to James, venc_sel is a direct clock input the VENC module,
> and it is an ancestor to some, but not all, of the subsystem clocks, in
> particular it is not an ancestor to "venc_cke0":
> 
>  static const struct mtk_gate venc_clks[] __initconst = {
>          GATE_VENC(CLK_VENC_CKE0, "venc_cke0", "mm_sel", 0),
>          GATE_VENC(CLK_VENC_CKE1, "venc_cke1", "venc_sel", 4),
>          GATE_VENC(CLK_VENC_CKE2, "venc_cke2", "venc_sel", 8),
>          GATE_VENC(CLK_VENC_CKE3, "venc_cke3", "venc_sel", 12),
>  };
> 
> If the VENC power domain is disabled, then accesses to the vencsys
> registers just silently fail (i.e., reads probably
> return all 0s). 

If this ever happens it is really unfortunate and needs fixing, too. If
you need the power domain to be powered ON to properly read or even
change the clock registers, the clock driver needs to be a consumer of
the power domain, so any clock operations powers the domain up.

> However, if the power domain is ON but venc_sel is
> disabled, then accessing the clock-controller registers results in a
> system hang (until the power domain is disabled).
> 
> At boot the VENC power domain is forced on by scpsys (like all other
> power domains).  Later both unused clocks and power domains get
> disabled. If clocks are disabled first, and venc_sel is disabled
> before venc_cke0, then when unused_clock tries to disable venc_cke0,
> the system will hang when it reads the vencsys "is clock enabled" bit
> since:
>   (a) VENC power domain is still enabled, and
>   (b) venc_sel is already disabled
> 
> In theory, we could add "clocks=<&topckgen CLK_TOP_VENC_SEL>;" to the
> vencsys clock-controller node.  On initialization, mtk_vencsys_init()
> could then pass venc_sel to the generic MT8173 gate clock driver, and
> it would then clk_enable(venc_sel)/_disable(venc_sel) around any
> access to the clock-controller registers.  James, however, thinks
> this is a lot of extra overhead, and instead has proposed the fix in this patch,
> where venc_sel is forced on whenever the VENC power domain is enabled.
> 
I would still say this is the correct solution. If the vencsys clock
registers are itself clocked by VENC_SEL this driver needs to make sure
this clock is running at the appropriate times. I understand that this
may be a bit of an overhead, but clock enable/disable paths are not
really performance critical.

> So, this patch is a bit of a hack, but the mtk scpsys driver already
> does something similar for the MM & MFG clocks - these clocks are
> always enabled whenever certain power domains are enabled, and they
> are only disabled when all such power domains are disabled.  I'm not
> 100% why these clocks must always be kept on whenever these power
> domains are enabled, but probably to solve a similar problem where
> these clocks are needed to access some registers at a time when these
> clocks would not otherwise be explicitly enabled.
> 
I can't really argue with this being the wrong solution if we already
have precedent of the same solution being used for other domains. But I
would still ask you to re-evaluate with the above in mind.

> Is the above clear?
> 
> >> But then we at least need a corresponding change to the binding documentation.
> 
> Agreed.
> 
> > Yes, we will need a really good reason to change the bindings.
> 
> There is a race where the system can sometimes hang at boot with the
> existing implementation & bindings.
> This seems like a really good reason to change the bindings, if the
> proposed solution is acceptable (it may be possible to fix the hang
> without changing the bindings).
> 
> > Apart from that we would need to find a solution which works with the old (and wrong bindings) as well.
> 
> Why do we need to support the old bindings?
> This patch fixes a problem in MT8173, for which the mainline kernel is
> still under active bringup.
> If the existing bindings are not used anywhere yet (*), it seems like
> unnecessary overhead to enforce backwards compatibility at this stage.
> 
I tend to agree. DT backwards compatibility isn't easily done (and is a
constant burden, as it gets harder to test over time) and if the SoC is
in early bringup one might just admit that the first iteration of the
bindings were wrong and need an incompatible change.

But that's not up to me to decide, but something Matthias needs to make
a decision on.

Regards,
Lucas
-- 
Pengutronix e.K.             | Lucas Stach                 |
Industrial Linux Solutions   | http://www.pengutronix.de/  |


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-09-29 10:25     ` Daniel Kurtz
  2015-09-30  9:07       ` Lucas Stach
@ 2015-10-01  6:58       ` James Liao
  1 sibling, 0 replies; 14+ messages in thread
From: James Liao @ 2015-10-01  6:58 UTC (permalink / raw)
  To: Daniel Kurtz
  Cc: Matthias Brugger, Lucas Stach, Sascha Hauer, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek

Hi Daniel,

Thanks for your help to explain the patch.

On Tue, 2015-09-29 at 18:25 +0800, Daniel Kurtz wrote:
> On Sun, Sep 27, 2015 at 7:25 PM, Matthias Brugger
> <matthias.bgg@gmail.com> wrote:
> >> But then we at least need a corresponding change to the binding documentation.

I'll send a new patch with changing binding document.

> If the existing bindings are not used anywhere yet (*), it seems like
> unnecessary overhead to enforce backwards compatibility at this stage.
> 
> (*) I don't actually know if this is true, perhaps only Mediatek can
> answer this.

What does the meaning of existing bindings are used anywhere? Do you
mean this binding is used by other SoCs ?

Currently scpsys driver can't be shared between different Mediatek SoCs
because the power domains are different from each other. So I think it
should no need to maintain backward compatibility on scpsys's binding.

> > Apart from that, please send the dtsi part as a seperate patch.

OK.


Best regards,

James


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-09-30  9:07       ` Lucas Stach
@ 2015-10-01  7:15         ` James Liao
  2015-10-01  8:07           ` Daniel Kurtz
  0 siblings, 1 reply; 14+ messages in thread
From: James Liao @ 2015-10-01  7:15 UTC (permalink / raw)
  To: Lucas Stach
  Cc: Daniel Kurtz, Matthias Brugger, Sascha Hauer, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek

Hi Lucas,

On Wed, 2015-09-30 at 11:07 +0200, Lucas Stach wrote:
> > If the VENC power domain is disabled, then accesses to the vencsys
> > registers just silently fail (i.e., reads probably
> > return all 0s). 
> 
> If this ever happens it is really unfortunate and needs fixing, too. If
> you need the power domain to be powered ON to properly read or even
> change the clock registers, the clock driver needs to be a consumer of
> the power domain, so any clock operations powers the domain up.

A subsystem should be powered on before its clock operations. But I
think this flow should be guaranteed by VENC driver. This patch is
focused on the race between disabling unused clocks and power domains by
frameworks. 

> > In theory, we could add "clocks=<&topckgen CLK_TOP_VENC_SEL>;" to the
> > vencsys clock-controller node.  On initialization, mtk_vencsys_init()
> > could then pass venc_sel to the generic MT8173 gate clock driver, and
> > it would then clk_enable(venc_sel)/_disable(venc_sel) around any
> > access to the clock-controller registers.  James, however, thinks
> > this is a lot of extra overhead, and instead has proposed the fix in this patch,
> > where venc_sel is forced on whenever the VENC power domain is enabled.
> > 
> I would still say this is the correct solution. If the vencsys clock
> registers are itself clocked by VENC_SEL this driver needs to make sure
> this clock is running at the appropriate times. I understand that this
> may be a bit of an overhead, but clock enable/disable paths are not
> really performance critical.
> 
> > So, this patch is a bit of a hack, but the mtk scpsys driver already
> > does something similar for the MM & MFG clocks - these clocks are
> > always enabled whenever certain power domains are enabled, and they
> > are only disabled when all such power domains are disabled.  I'm not
> > 100% why these clocks must always be kept on whenever these power
> > domains are enabled, but probably to solve a similar problem where
> > these clocks are needed to access some registers at a time when these
> > clocks would not otherwise be explicitly enabled.
> > 
> I can't really argue with this being the wrong solution if we already
> have precedent of the same solution being used for other domains. But I
> would still ask you to re-evaluate with the above in mind.

One cause of the hang up issue is frameworks' behavior. Power domain
framework and clock framework work independently during kernel init. So
their control flow can't be guaranteed by a suitable driver, such as
VENC driver.

I preferred to keep venc_sel on during VENC power on because I'm not
sure there is any other framework may control VENC's registers during
kernel init.


Best regards,

James



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-10-01  7:15         ` James Liao
@ 2015-10-01  8:07           ` Daniel Kurtz
  2015-10-01  9:26             ` James Liao
  0 siblings, 1 reply; 14+ messages in thread
From: Daniel Kurtz @ 2015-10-01  8:07 UTC (permalink / raw)
  To: James Liao
  Cc: Lucas Stach, Matthias Brugger, Sascha Hauer, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek

Hi James,

On Thu, Oct 1, 2015 at 3:15 PM, James Liao <jamesjj.liao@mediatek.com> wrote:
> Hi Lucas,
>
> On Wed, 2015-09-30 at 11:07 +0200, Lucas Stach wrote:
>> > If the VENC power domain is disabled, then accesses to the vencsys
>> > registers just silently fail (i.e., reads probably
>> > return all 0s).
>>
>> If this ever happens it is really unfortunate and needs fixing, too. If
>> you need the power domain to be powered ON to properly read or even
>> change the clock registers, the clock driver needs to be a consumer of
>> the power domain, so any clock operations powers the domain up.
>
> A subsystem should be powered on before its clock operations. But I
> think this flow should be guaranteed by VENC driver. This patch is
> focused on the race between disabling unused clocks and power domains by
> frameworks.
>
>> > In theory, we could add "clocks=<&topckgen CLK_TOP_VENC_SEL>;" to the
>> > vencsys clock-controller node.  On initialization, mtk_vencsys_init()
>> > could then pass venc_sel to the generic MT8173 gate clock driver, and
>> > it would then clk_enable(venc_sel)/_disable(venc_sel) around any
>> > access to the clock-controller registers.  James, however, thinks
>> > this is a lot of extra overhead, and instead has proposed the fix in this patch,
>> > where venc_sel is forced on whenever the VENC power domain is enabled.
>> >
>> I would still say this is the correct solution. If the vencsys clock
>> registers are itself clocked by VENC_SEL this driver needs to make sure
>> this clock is running at the appropriate times. I understand that this
>> may be a bit of an overhead, but clock enable/disable paths are not
>> really performance critical.
>>
>> > So, this patch is a bit of a hack, but the mtk scpsys driver already
>> > does something similar for the MM & MFG clocks - these clocks are
>> > always enabled whenever certain power domains are enabled, and they
>> > are only disabled when all such power domains are disabled.  I'm not
>> > 100% why these clocks must always be kept on whenever these power
>> > domains are enabled, but probably to solve a similar problem where
>> > these clocks are needed to access some registers at a time when these
>> > clocks would not otherwise be explicitly enabled.
>> >
>> I can't really argue with this being the wrong solution if we already
>> have precedent of the same solution being used for other domains. But I
>> would still ask you to re-evaluate with the above in mind.
>
> One cause of the hang up issue is frameworks' behavior. Power domain
> framework and clock framework work independently during kernel init. So
> their control flow can't be guaranteed by a suitable driver, such as
> VENC driver.

Hmm... below is my current understanding.

In the current software architecture, we have split the "venc"
subsystem register space into two parts (two dt nodes), with each node
handled instantiated as its own  platform device, and handled by a
different platform driver:

vencsys: clock-controller@18000000 {
  compatible = "mediatek,mt8173-vencsys", "syscon";
  reg = <0 0x18000000 0 0x1000>;
  #clock-cells = <1>;
};

/* TBD - a venc driver has not been posted to the list so I don't
really know yet what it will look like */
 venc: encoder@~18000000 {
   compatible = "mediatek,mt8173-venc?";
   reg = <0 ~0x18000000 0 ?>;
   clocks = <& <&vencsys CLK_VENC_CKE1>, <&vencsys CLK_VENC_CKE0>, ...;
   clock-names = "apb", "smi", ...;
   ...
 };

Each independent driver must take appropriate independent action to
ensure that any clock required to access its associated registers is
enabled when accessing said registers.
In other words,
 * the venc *clock-controller* driver that must enable any clocks
required to access the *clock control* registers
 * the venc *encoder* driver must enable clocks any clocks required to
access the *encoder* registers

Actually, the situation is a little worse that this, since we also
have a 'larb' node in the same register space, whose driver must also
ensure that VENC_SEL is enabled before accessing its registers:

larb3: larb@18001000 {
  compatible = "mediatek,mt8173-smi-larb";
  reg = <0 0x18001000 0 0x1000>;
  mediatek,smi = <&smi_common>;
  power-domains = <&scpsys MT8173_POWER_DOMAIN_VENC>;
  clocks = <&vencsys CLK_VENC_CKE1>, <&vencsys CLK_VENC_CKE0>;
  clock-names = "apb", "smi";
};

Personally, I am not opposed to using the power-domain enable/disable
as a proxy for also enabling/disabling the "subsystem register bank"
clock as is done in this patch and by the scpsys driver in general.
However, it feels a bit like an abuse of the power domain api.

> I preferred to keep venc_sel on during VENC power on because I'm not
> sure there is any other framework may control VENC's registers during
> kernel init.

The misunderstanding here is the interpretation of "VENC's registers".
In this case, the registers in question are those owned by the
"vencsys: clock-controller@18000000" node, and the driver accessing
them is drivers/clk/mediatek/clk-gate.c.

Best,
-Dan

>
>
> Best regards,
>
> James
>
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-10-01  8:07           ` Daniel Kurtz
@ 2015-10-01  9:26             ` James Liao
  2015-10-01 10:08               ` Daniel Kurtz
  0 siblings, 1 reply; 14+ messages in thread
From: James Liao @ 2015-10-01  9:26 UTC (permalink / raw)
  To: Daniel Kurtz
  Cc: Lucas Stach, Matthias Brugger, Sascha Hauer, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek

Hi Daniel,

On Thu, 2015-10-01 at 16:07 +0800, Daniel Kurtz wrote:
> Hmm... below is my current understanding.
> 
> In the current software architecture, we have split the "venc"
> subsystem register space into two parts (two dt nodes), with each node
> handled instantiated as its own  platform device, and handled by a
> different platform driver:
> 
> vencsys: clock-controller@18000000 {
>   compatible = "mediatek,mt8173-vencsys", "syscon";
>   reg = <0 0x18000000 0 0x1000>;
>   #clock-cells = <1>;
> };
> 
> /* TBD - a venc driver has not been posted to the list so I don't
> really know yet what it will look like */
>  venc: encoder@~18000000 {
>    compatible = "mediatek,mt8173-venc?";
>    reg = <0 ~0x18000000 0 ?>;
>    clocks = <& <&vencsys CLK_VENC_CKE1>, <&vencsys CLK_VENC_CKE0>, ...;
>    clock-names = "apb", "smi", ...;
>    ...
>  };
> 
> Each independent driver must take appropriate independent action to
> ensure that any clock required to access its associated registers is
> enabled when accessing said registers.
> In other words,
>  * the venc *clock-controller* driver that must enable any clocks
> required to access the *clock control* registers
>  * the venc *encoder* driver must enable clocks any clocks required to
> access the *encoder* registers

> Actually, the situation is a little worse that this, since we also
> have a 'larb' node in the same register space, whose driver must also
> ensure that VENC_SEL is enabled before accessing its registers:
> 
> larb3: larb@18001000 {
>   compatible = "mediatek,mt8173-smi-larb";
>   reg = <0 0x18001000 0 0x1000>;
>   mediatek,smi = <&smi_common>;
>   power-domains = <&scpsys MT8173_POWER_DOMAIN_VENC>;
>   clocks = <&vencsys CLK_VENC_CKE1>, <&vencsys CLK_VENC_CKE0>;
>   clock-names = "apb", "smi";
> };
> 
> Personally, I am not opposed to using the power-domain enable/disable
> as a proxy for also enabling/disabling the "subsystem register bank"
> clock as is done in this patch and by the scpsys driver in general.
> However, it feels a bit like an abuse of the power domain api.
> 
> > I preferred to keep venc_sel on during VENC power on because I'm not
> > sure there is any other framework may control VENC's registers during
> > kernel init.
> 
> The misunderstanding here is the interpretation of "VENC's registers".
> In this case, the registers in question are those owned by the
> "vencsys: clock-controller@18000000" node, and the driver accessing
> them is drivers/clk/mediatek/clk-gate.c.

The situation is more complex than you mentioned above. VENC (0x18000000
~ 0x1800ffff) for example, if we want to avoid hanging up while
accessing registers, we need to:
 
 - Prevent venc_sel off while VENC is powered on.
 - Prevent mm_sel off while MM is powered on.

First, it's not worth to control power domains in clock control path
because it's too expensive. So we want to keep clock on before accessing
registers or during the domain is powered on.

Besides, modeling a clock controller as a consumer of power domain may
not a good idea, because there are power domains be consumers of clocks,
such as:

	[MT8173_POWER_DOMAIN_MM] = {
		.name = "mm",
		[...]
		.clk_id = MT8173_CLK_MM,
		[...]
	},

Second, if we want to avoid hanging up by enabling related top clocks in
clock controllers, we need to clock on venc_sel and mm_sel before *each
clock operations*, and then clock off them after clock operations. That
means:

- To check venc_cke0's state:
 = clk_prepare mm_sel and venc_sel (where to prepare?)
  - It may turn on related PLLs, and it can't be invoked in an atomic
context.
 = clk_enable mm_sel and venc_sel.
 = Read VENC's clock register.
 = clk_disable mm_sel and venc_sel.
 = clk_unprepare mm_sel and venc_sel (where to unprepare?)
  - It may turn off PLLs, and it can't be invoked in an atomic context.
 = Return venc_cke0's state.

Overhead is a reason. The clock framework's API flow (prepare - enable -
disable - unprepare) is another reason to reject the solution in clock
controllers, because prepare/unprepare is a non-atomic operation but
operations to access registers may be a atomic context.



Best regards,

James




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-10-01  9:26             ` James Liao
@ 2015-10-01 10:08               ` Daniel Kurtz
  2015-10-02  3:00                 ` James Liao
  0 siblings, 1 reply; 14+ messages in thread
From: Daniel Kurtz @ 2015-10-01 10:08 UTC (permalink / raw)
  To: James Liao
  Cc: Lucas Stach, Matthias Brugger, Sascha Hauer, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek,
	linux-clk, Mike Turquette, Stephen Boyd, linux-pm, rjw

+linux-clk +linux-pm and the genpd and clock gurus

Hi James,

On Thu, Oct 1, 2015 at 5:26 PM, James Liao <jamesjj.liao@mediatek.com> wrote:
> Hi Daniel,
>
> On Thu, 2015-10-01 at 16:07 +0800, Daniel Kurtz wrote:
>> Hmm... below is my current understanding.
>>
>> In the current software architecture, we have split the "venc"
>> subsystem register space into two parts (two dt nodes), with each node
>> handled instantiated as its own  platform device, and handled by a
>> different platform driver:
>>
>> vencsys: clock-controller@18000000 {
>>   compatible = "mediatek,mt8173-vencsys", "syscon";
>>   reg = <0 0x18000000 0 0x1000>;
>>   #clock-cells = <1>;
>> };
>>
>> /* TBD - a venc driver has not been posted to the list so I don't
>> really know yet what it will look like */
>>  venc: encoder@~18000000 {
>>    compatible = "mediatek,mt8173-venc?";
>>    reg = <0 ~0x18000000 0 ?>;
>>    clocks = <& <&vencsys CLK_VENC_CKE1>, <&vencsys CLK_VENC_CKE0>, ...;
>>    clock-names = "apb", "smi", ...;
>>    ...
>>  };
>>
>> Each independent driver must take appropriate independent action to
>> ensure that any clock required to access its associated registers is
>> enabled when accessing said registers.
>> In other words,
>>  * the venc *clock-controller* driver that must enable any clocks
>> required to access the *clock control* registers
>>  * the venc *encoder* driver must enable clocks any clocks required to
>> access the *encoder* registers
>
>> Actually, the situation is a little worse that this, since we also
>> have a 'larb' node in the same register space, whose driver must also
>> ensure that VENC_SEL is enabled before accessing its registers:
>>
>> larb3: larb@18001000 {
>>   compatible = "mediatek,mt8173-smi-larb";
>>   reg = <0 0x18001000 0 0x1000>;
>>   mediatek,smi = <&smi_common>;
>>   power-domains = <&scpsys MT8173_POWER_DOMAIN_VENC>;
>>   clocks = <&vencsys CLK_VENC_CKE1>, <&vencsys CLK_VENC_CKE0>;
>>   clock-names = "apb", "smi";
>> };
>>
>> Personally, I am not opposed to using the power-domain enable/disable
>> as a proxy for also enabling/disabling the "subsystem register bank"
>> clock as is done in this patch and by the scpsys driver in general.
>> However, it feels a bit like an abuse of the power domain api.
>>
>> > I preferred to keep venc_sel on during VENC power on because I'm not
>> > sure there is any other framework may control VENC's registers during
>> > kernel init.
>>
>> The misunderstanding here is the interpretation of "VENC's registers".
>> In this case, the registers in question are those owned by the
>> "vencsys: clock-controller@18000000" node, and the driver accessing
>> them is drivers/clk/mediatek/clk-gate.c.
>
> The situation is more complex than you mentioned above. VENC (0x18000000
> ~ 0x1800ffff) for example, if we want to avoid hanging up while
> accessing registers, we need to:
>
>  - Prevent venc_sel off while VENC is powered on.
>  - Prevent mm_sel off while MM is powered on.
>
> First, it's not worth to control power domains in clock control path
> because it's too expensive. So we want to keep clock on before accessing
> registers or during the domain is powered on.
>
> Besides, modeling a clock controller as a consumer of power domain may
> not a good idea, because there are power domains be consumers of clocks,
> such as:
>
>         [MT8173_POWER_DOMAIN_MM] = {
>                 .name = "mm",
>                 [...]
>                 .clk_id = MT8173_CLK_MM,
>                 [...]
>         },

I see two cases where "a power domain is a consumer of a clock":
  (a) the clock is needed to access the power domain control
registers.  The clock must actually be in another power domain, since
otherwise you could never turn *on* the power domain in question.
  (b) the clock is needed to talk to the power domain that is about to
turn off, or that was just turned on - these clocks are usually used
to control some kind of bus arbitration, etc.  In this case, the
clocks are only accessed when the power domain is on.

I would argue that in both cases, the clock should in theory should
only be enabled/disabled by the power on/off routine for the duration
of time that is needed, and that it should not be "left enabled" by
the power domain on/off function...  I can see how this might be a
useful optimization - but it also seems like a way to burn extra power
by leaving on a clock that is not necessarily being used.

But - perhaps I am over thinking this, and it is standard practice to
bundle power domains with the clocks that service components in the
power domain.

> Second, if we want to avoid hanging up by enabling related top clocks in
> clock controllers, we need to clock on venc_sel and mm_sel before *each
> clock operations*, and then clock off them after clock operations. That
> means:
>
> - To check venc_cke0's state:
>  = clk_prepare mm_sel and venc_sel (where to prepare?)
>   - It may turn on related PLLs, and it can't be invoked in an atomic
> context.
>  = clk_enable mm_sel and venc_sel.
>  = Read VENC's clock register.
>  = clk_disable mm_sel and venc_sel.
>  = clk_unprepare mm_sel and venc_sel (where to unprepare?)
>   - It may turn off PLLs, and it can't be invoked in an atomic context.
>  = Return venc_cke0's state.
>
> Overhead is a reason. The clock framework's API flow (prepare - enable -
> disable - unprepare) is another reason to reject the solution in clock
> controllers, because prepare/unprepare is a non-atomic operation but
> operations to access registers may be a atomic context.

Or, alternatively, just prepare venc_sel once during init, for each
clock controller that needs it, when configuring the clock controller
node (for example, in mtk_vencsys_init()). This is similar to how we
set up 'critical clocks'.
By just preparing the clock, you tell the common clock core:
 "my clock controller will need this clock later; please make sure
that it can be enabled/disabled later during atomic context."

Thanks!
-Dan

>
>
>
> Best regards,
>
> James
>
>
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-10-01 10:08               ` Daniel Kurtz
@ 2015-10-02  3:00                 ` James Liao
  2015-10-02  9:25                   ` Daniel Kurtz
  0 siblings, 1 reply; 14+ messages in thread
From: James Liao @ 2015-10-02  3:00 UTC (permalink / raw)
  To: Daniel Kurtz
  Cc: Lucas Stach, Matthias Brugger, Sascha Hauer, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek,
	linux-clk, Mike Turquette, Stephen Boyd, linux-pm, rjw

Hi Daniel,

On Thu, 2015-10-01 at 18:08 +0800, Daniel Kurtz wrote:
> I see two cases where "a power domain is a consumer of a clock":
>   (a) the clock is needed to access the power domain control
> registers.  The clock must actually be in another power domain, since
> otherwise you could never turn *on* the power domain in question.
>   (b) the clock is needed to talk to the power domain that is about to
> turn off, or that was just turned on - these clocks are usually used
> to control some kind of bus arbitration, etc.  In this case, the
> clocks are only accessed when the power domain is on.
> 
> I would argue that in both cases, the clock should in theory should
> only be enabled/disabled by the power on/off routine for the duration
> of time that is needed, and that it should not be "left enabled" by
> the power domain on/off function...  I can see how this might be a
> useful optimization - but it also seems like a way to burn extra power
> by leaving on a clock that is not necessarily being used.
> 
> But - perhaps I am over thinking this, and it is standard practice to
> bundle power domains with the clocks that service components in the
> power domain.

Yes, you are right. Power domain on/off operations need clocks to get
status of the subsystem. Keeping clocks on during power domain on is an
optimization.

But to model a clock controller as a power domain consumer, or say we
need to control power domain before clock on/off, is not a good practice
because we always consider clock operations should be light weight.
Power domains should be controlled explicitly by subsystem drivers,
instead of hiding behind clock controls.

> Or, alternatively, just prepare venc_sel once during init, for each
> clock controller that needs it, when configuring the clock controller
> node (for example, in mtk_vencsys_init()). This is similar to how we
> set up 'critical clocks'.
> By just preparing the clock, you tell the common clock core:
>  "my clock controller will need this clock later; please make sure
> that it can be enabled/disabled later during atomic context."

In current implementation, PLLs will be turned on in clk_prepare(). So
if a clock is always prepared, its parent PLL will be kept on even if
the clock is not enabled. It consumes extra power, so we don't prefer
this kind of solution.


Best regards,

James



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-10-02  3:00                 ` James Liao
@ 2015-10-02  9:25                   ` Daniel Kurtz
  2015-10-02 10:37                     ` James Liao
  0 siblings, 1 reply; 14+ messages in thread
From: Daniel Kurtz @ 2015-10-02  9:25 UTC (permalink / raw)
  To: James Liao
  Cc: Lucas Stach, Matthias Brugger, Sascha Hauer, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek,
	linux-clk, Mike Turquette, Stephen Boyd, linux-pm, rjw

On Fri, Oct 2, 2015 at 11:00 AM, James Liao <jamesjj.liao@mediatek.com> wrote:
> Hi Daniel,
>
> On Thu, 2015-10-01 at 18:08 +0800, Daniel Kurtz wrote:
>> I see two cases where "a power domain is a consumer of a clock":
>>   (a) the clock is needed to access the power domain control
>> registers.  The clock must actually be in another power domain, since
>> otherwise you could never turn *on* the power domain in question.
>>   (b) the clock is needed to talk to the power domain that is about to
>> turn off, or that was just turned on - these clocks are usually used
>> to control some kind of bus arbitration, etc.  In this case, the
>> clocks are only accessed when the power domain is on.
>>
>> I would argue that in both cases, the clock should in theory should
>> only be enabled/disabled by the power on/off routine for the duration
>> of time that is needed, and that it should not be "left enabled" by
>> the power domain on/off function...  I can see how this might be a
>> useful optimization - but it also seems like a way to burn extra power
>> by leaving on a clock that is not necessarily being used.
>>
>> But - perhaps I am over thinking this, and it is standard practice to
>> bundle power domains with the clocks that service components in the
>> power domain.
>
> Yes, you are right. Power domain on/off operations need clocks to get
> status of the subsystem. Keeping clocks on during power domain on is an
> optimization.
>
> But to model a clock controller as a power domain consumer, or say we
> need to control power domain before clock on/off, is not a good practice
> because we always consider clock operations should be light weight.
> Power domains should be controlled explicitly by subsystem drivers,
> instead of hiding behind clock controls.
>
>> Or, alternatively, just prepare venc_sel once during init, for each
>> clock controller that needs it, when configuring the clock controller
>> node (for example, in mtk_vencsys_init()). This is similar to how we
>> set up 'critical clocks'.
>> By just preparing the clock, you tell the common clock core:
>>  "my clock controller will need this clock later; please make sure
>> that it can be enabled/disabled later during atomic context."
>
> In current implementation, PLLs will be turned on in clk_prepare(). So
> if a clock is always prepared, its parent PLL will be kept on even if
> the clock is not enabled. It consumes extra power, so we don't prefer
> this kind of solution.

Actually, I should have proposed adding prepare / unprepare callbacks
to mtk_clk_gate_ops in which we prepare_enable/disable_unprepare
venc_sel (& mm_sel).
This should correctly track all of the clk reference counting needed
to access the venc clock control registers.


However, this would not actually solve the issue that this patch is
trying to fix, since nobody would have called the
prepare_clk(venc_cke0) before clk_disable_unused_subtree(venc_cke0).
Bummer.

I think the crux of the original issue is that the mediatek/clk-gate.c
is_enabled() callbacks may be called at a point in time when venc_sel
is disabled but VENC PD is enabled.
So, perhaps an easier, but still a little hacky way out is to keep the
clk & PD problems separate, and just check __clk_is_enabled(venc_sel)
before reading the venc clk status register.

WDYT?

By the way, I'm not really opposed to your current implementation
either, I was just helping Matthias & Lucas understand in enough
detail that they can help review.

-Dan

>
>
> Best regards,
>
> James
>
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-10-02  9:25                   ` Daniel Kurtz
@ 2015-10-02 10:37                     ` James Liao
  0 siblings, 0 replies; 14+ messages in thread
From: James Liao @ 2015-10-02 10:37 UTC (permalink / raw)
  To: Daniel Kurtz
  Cc: Lucas Stach, Matthias Brugger, Sascha Hauer, linux-arm-kernel,
	open list:OPEN FIRMWARE AND...,
	srv_heupstream, Kevin Hilman, linux-kernel, linux-mediatek,
	linux-clk, Mike Turquette, Stephen Boyd, linux-pm, rjw

HI Daniel,

On Fri, 2015-10-02 at 17:25 +0800, Daniel Kurtz wrote:
> Actually, I should have proposed adding prepare / unprepare callbacks
> to mtk_clk_gate_ops in which we prepare_enable/disable_unprepare
> venc_sel (& mm_sel).
> This should correctly track all of the clk reference counting needed
> to access the venc clock control registers.
> 
> 
> However, this would not actually solve the issue that this patch is
> trying to fix, since nobody would have called the
> prepare_clk(venc_cke0) before clk_disable_unused_subtree(venc_cke0).
> Bummer.

Hmm... it's true.

> I think the crux of the original issue is that the mediatek/clk-gate.c
> is_enabled() callbacks may be called at a point in time when venc_sel
> is disabled but VENC PD is enabled.
> So, perhaps an easier, but still a little hacky way out is to keep the
> clk & PD problems separate, and just check __clk_is_enabled(venc_sel)
> before reading the venc clk status register.
> 
> WDYT?

This may be a way to resolve this issue, but model the prerequisite
clock of subsystem clocks is not straightforward. And once we model the
prerequisite clocks, someone may suggest us to enable prerequisite
clocks before operate subsystem clocks, which may have concerns on
overhead and complexity.

> By the way, I'm not really opposed to your current implementation
> either, I was just helping Matthias & Lucas understand in enough
> detail that they can help review.

I know and many thanks for your suggestion on this patch.


Best regards,

James


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] soc: mediatek: Fix random hang up issue while kernel init
  2015-09-25  6:31 [PATCH] soc: mediatek: Fix random hang up issue while kernel init James Liao
  2015-09-25  8:26 ` Lucas Stach
@ 2015-10-06  1:57 ` Daniel Kurtz
  1 sibling, 0 replies; 14+ messages in thread
From: Daniel Kurtz @ 2015-10-06  1:57 UTC (permalink / raw)
  To: James Liao
  Cc: linux-arm-kernel, Matthias Brugger, srv_heupstream, Kevin Hilman,
	Sascha Hauer, open list:OPEN FIRMWARE AND...,
	linux-kernel, linux-mediatek

On Fri, Sep 25, 2015 at 2:31 PM, James Liao <jamesjj.liao@mediatek.com> wrote:
> In kernel late init, it turns off all unused clocks, which
> needs to access subsystem registers such as VENC and VENC_LT.
>
> Accessing MT8173 VENC registers needs two top clocks, mm_sel and
> venc_sel. Accessing VENC_LT registers needs mm_sel and venclt_sel.
> So we need to keep these clocks on before accessing their registers.
>
> This patch keeps venc_sel / venclt_sel clock on when
> VENC / VENC_LT's power is on, to prevent system hang up while
> accessing its registeres.
>
> Signed-off-by: James Liao <jamesjj.liao@mediatek.com>

After the discussions with James here on the list, I'm happy with this fix.
Compared to any proposed alternative, this patch seems just as
effective and is also simpler and more maintainable and extensible (if
other clocks/power domains need the same treatment).

Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-10-06  1:58 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-25  6:31 [PATCH] soc: mediatek: Fix random hang up issue while kernel init James Liao
2015-09-25  8:26 ` Lucas Stach
2015-09-27 11:25   ` Matthias Brugger
2015-09-29 10:25     ` Daniel Kurtz
2015-09-30  9:07       ` Lucas Stach
2015-10-01  7:15         ` James Liao
2015-10-01  8:07           ` Daniel Kurtz
2015-10-01  9:26             ` James Liao
2015-10-01 10:08               ` Daniel Kurtz
2015-10-02  3:00                 ` James Liao
2015-10-02  9:25                   ` Daniel Kurtz
2015-10-02 10:37                     ` James Liao
2015-10-01  6:58       ` James Liao
2015-10-06  1:57 ` Daniel Kurtz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).