[PATCH 2/3] clk: renesas: rcar-gen4: Add support for fractional multiplication

From: Geert Uytterhoeven <geert+renesas@glider.be>
To: Magnus Damm <magnus.damm@gmail.com>,
	Michael Turquette <mturquette@baylibre.com>,
	Stephen Boyd <sboyd@kernel.org>
Cc: Tho Vu <tho.vu.wh@renesas.com>,
	linux-renesas-soc@vger.kernel.org, linux-clk@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	Geert Uytterhoeven <geert+renesas@glider.be>
Subject: [PATCH 2/3] clk: renesas: rcar-gen4: Add support for fractional multiplication
Date: Thu,  8 Dec 2022 10:56:59 +0100	[thread overview]
Message-ID: <a174da512fb1cba0a001c9aed130a2adca14e60a.1670492384.git.geert+renesas@glider.be> (raw)
In-Reply-To: <cover.1670492384.git.geert+renesas@glider.be>

R-Car Gen4 PLLs support fractional multiplication, which can improve
accuracy when configuring a specific frequency.

Add support for fractional multiplication to the custom clock driver
for PLLs, which is currently used only for PLL2 on R-Car V4H.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
I am not so sure it is worth supporting this.
On R-Car V4H, the following clock rates are seen for PLL2 and the
Cortex-A76 CPU core clock, when using the Normal vs. the
High-Performance mode:

					Multiplication Mode
					Integer         Frational
					----------	----------
    Normal (1.7 GHz):            PLL2   3399999864      3399999997
				 Z0     1699999932      1699999999
    High-Performance (1.8 GHz):  PLL2   3599999856      3599999997
				 Z0     1799999928      1799999999

The improvement is of a similar order of magnitude as the accuracy of
the external crystal, hence insignificant...

With the current implementation, fractional multiplication does not have
any impact on lower performance points, as those rely on changing the Z0
divider (which supports 32 steps only), instead of changing the PLL.
---
 drivers/clk/renesas/rcar-gen4-cpg.c | 69 ++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 15 deletions(-)

diff --git a/drivers/clk/renesas/rcar-gen4-cpg.c b/drivers/clk/renesas/rcar-gen4-cpg.c
index c68d8b987054131b..54dc3aa82b499725 100644
--- a/drivers/clk/renesas/rcar-gen4-cpg.c
+++ b/drivers/clk/renesas/rcar-gen4-cpg.c
@@ -57,10 +57,13 @@ static u32 cpg_mode __initdata;
 #define SSMODE_DITHER		BIT(1)	/* Frequency Dithering */
 #define SSMODE_CENTER		BIT(0)	/* Center (vs. Down) Spread Dithering */
 
+#define CPG_PLLxCR1_NF		GENMASK(24, 0)	/* Fractional mult. factor */
+
 /* PLL Clocks */
 struct cpg_pll_clk {
 	struct clk_hw hw;
 	void __iomem *pllcr0_reg;
+	void __iomem *pllcr1_reg;
 	void __iomem *pllecr_reg;
 	u32 pllecr_pllst_mask;
 };
@@ -71,17 +74,26 @@ static unsigned long cpg_pll_clk_recalc_rate(struct clk_hw *hw,
 					     unsigned long parent_rate)
 {
 	struct cpg_pll_clk *pll_clk = to_pll_clk(hw);
-	unsigned int mult;
-
-	mult = FIELD_GET(CPG_PLLxCR0_NI, readl(pll_clk->pllcr0_reg)) + 1;
+	u32 cr0 = readl(pll_clk->pllcr0_reg);
+	unsigned int ni, nf;
+	unsigned long rate;
+
+	ni = (FIELD_GET(CPG_PLLxCR0_NI, cr0) + 1) * 2;
+	rate = parent_rate * ni;
+	if (cr0 & CPG_PLLxCR0_SSMODE_FM) {
+		nf = FIELD_GET(CPG_PLLxCR1_NF, readl(pll_clk->pllcr1_reg));
+		rate += ((u64)parent_rate * nf) >> 24;
+	}
 
-	return parent_rate * mult * 2;
+	return rate;
 }
 
 static int cpg_pll_clk_determine_rate(struct clk_hw *hw,
 				      struct clk_rate_request *req)
 {
-	unsigned int min_mult, max_mult, mult;
+	struct cpg_pll_clk *pll_clk = to_pll_clk(hw);
+	unsigned int min_mult, max_mult, ni, nf;
+	u32 cr0 = readl(pll_clk->pllcr0_reg);
 	unsigned long prate;
 
 	prate = req->best_parent_rate * 2;
@@ -90,10 +102,23 @@ static int cpg_pll_clk_determine_rate(struct clk_hw *hw,
 	if (max_mult < min_mult)
 		return -EINVAL;
 
-	mult = DIV_ROUND_CLOSEST_ULL(req->rate, prate);
-	mult = clamp(mult, min_mult, max_mult);
+	if (cr0 & CPG_PLLxCR0_SSMODE_FM) {
+		ni = div64_ul(req->rate, prate);
+		if (ni < min_mult) {
+			ni = min_mult;
+			nf = 0;
+		} else {
+			ni = min(ni, max_mult);
+			nf = ((u64)(req->rate - prate * ni) << 24) /
+			     req->best_parent_rate;
+		}
+	} else {
+		ni = DIV_ROUND_CLOSEST_ULL(req->rate, prate);
+		ni = clamp(ni, min_mult, max_mult);
+		nf = 0;
+	}
+	req->rate = prate * ni + (((u64)req->best_parent_rate * nf) >> 24);
 
-	req->rate = prate * mult;
 	return 0;
 }
 
@@ -101,17 +126,31 @@ static int cpg_pll_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 				unsigned long parent_rate)
 {
 	struct cpg_pll_clk *pll_clk = to_pll_clk(hw);
-	unsigned int mult;
+	unsigned long prate = parent_rate * 2;
+	u32 cr0 = readl(pll_clk->pllcr0_reg);
+	unsigned int ni, nf;
+	int error;
 	u32 val;
 
-	mult = DIV_ROUND_CLOSEST_ULL(rate, parent_rate * 2);
-	mult = clamp(mult, 1U, 256U);
+	if (cr0 & CPG_PLLxCR0_SSMODE_FM) {
+		ni = div64_ul(rate, prate);
+		if (ni < 1) {
+			ni = 1;
+			nf = 0;
+		} else {
+			ni = min(ni, 256U);
+			nf = ((u64)(rate - prate * ni) << 24) / parent_rate;
+		}
+	} else {
+		ni = DIV_ROUND_CLOSEST_ULL(rate, prate);
+		ni = clamp(ni, 1U, 256U);
+	}
 
 	if (readl(pll_clk->pllcr0_reg) & CPG_PLLxCR0_KICK)
 		return -EBUSY;
 
 	cpg_reg_modify(pll_clk->pllcr0_reg, CPG_PLLxCR0_NI,
-		       FIELD_PREP(CPG_PLLxCR0_NI, mult - 1));
+		       FIELD_PREP(CPG_PLLxCR0_NI, ni - 1));
 
 	/*
 	 * Set KICK bit in PLLxCR0 to update hardware setting and wait for
@@ -161,12 +200,12 @@ static struct clk * __init cpg_pll_clk_register(const char *name,
 
 	pll_clk->hw.init = &init;
 	pll_clk->pllcr0_reg = base + cr0_offset;
+	pll_clk->pllcr1_reg = base + cr1_offset;
 	pll_clk->pllecr_reg = base + CPG_PLLECR;
 	pll_clk->pllecr_pllst_mask = CPG_PLLECR_PLLST(index);
 
-	/* Disable Fractional Multiplication and Frequency Dithering */
-	writel(0, base + cr1_offset);
-	cpg_reg_modify(pll_clk->pllcr0_reg, CPG_PLLxCR0_SSMODE, 0);
+	/* Enable Fractional Multiplication */
+	cpg_reg_modify(pll_clk->pllcr0_reg, 0, CPG_PLLxCR0_SSMODE_FM);
 
 	clk = clk_register(NULL, &pll_clk->hw);
 	if (IS_ERR(clk))
-- 
2.25.1