All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-05-26  4:14 ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-05-26  4:14 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson, Sai Prakash Ranjan

TLB sync timeouts can be due to various reasons such as TBU power down
or pending TCU/TBU invalidation/sync and so on. Debugging these often
require dumping of some implementation defined registers to know the
status of TBU/TCU operations and some of these registers are not
accessible in non-secure world such as from kernel and requires SMC
calls to read them in the secure world. So, add this debug support
to dump implementation defined registers for TLB sync timeout issues.

Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
---

Changes in v2:
 * Use scm call consistently so that it works on older chipsets where
   some of these regs are secure registers.
 * Add device specific data to get the implementation defined register
   offsets.

---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
 drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
 drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
 3 files changed, 146 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 7820711c4560..bb68aa85b28b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -5,13 +5,27 @@
 
 #include <linux/acpi.h>
 #include <linux/adreno-smmu-priv.h>
+#include <linux/delay.h>
 #include <linux/of_device.h>
 #include <linux/qcom_scm.h>
 
 #include "arm-smmu.h"
 
+#define QCOM_DUMMY_VAL	-1
+
+enum qcom_smmu_impl_reg_offset {
+	QCOM_SMMU_TBU_PWR_STATUS,
+	QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
+	QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
+};
+
+struct qcom_smmu_config {
+	const u32 *reg_offset;
+};
+
 struct qcom_smmu {
 	struct arm_smmu_device smmu;
+	const struct qcom_smmu_config *cfg;
 	bool bypass_quirk;
 	u8 bypass_cbndx;
 	u32 stall_enabled;
@@ -22,6 +36,56 @@ static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
 	return container_of(smmu, struct qcom_smmu, smmu);
 }
 
+static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
+				int sync, int status)
+{
+	int ret;
+	unsigned int spin_cnt, delay;
+	u32 reg, tbu_pwr_status, sync_inv_ack, sync_inv_progress;
+	struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+	const struct qcom_smmu_config *cfg;
+
+	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
+	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+			reg = arm_smmu_readl(smmu, page, status);
+			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
+				return;
+			cpu_relax();
+		}
+		udelay(delay);
+	}
+
+	dev_err_ratelimited(smmu->dev,
+			    "TLB sync timed out -- SMMU may be deadlocked\n");
+
+	cfg = qsmmu->cfg;
+	if (!cfg)
+		return;
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
+				&tbu_pwr_status);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TBU power status: %d\n", ret);
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
+				&sync_inv_ack);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TBU sync/inv ack status: %d\n", ret);
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
+				&sync_inv_progress);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TCU syn/inv progress: %d\n", ret);
+
+	dev_err_ratelimited(smmu->dev,
+			    "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
+			    tbu_pwr_status, sync_inv_ack, sync_inv_progress);
+}
+
 static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
 		u32 reg)
 {
@@ -374,6 +438,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
 	.def_domain_type = qcom_smmu_def_domain_type,
 	.reset = qcom_smmu500_reset,
 	.write_s2cr = qcom_smmu_write_s2cr,
+	.tlb_sync = qcom_smmu_tlb_sync,
 };
 
 static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
@@ -382,12 +447,84 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
 	.reset = qcom_smmu500_reset,
 	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
 	.write_sctlr = qcom_adreno_smmu_write_sctlr,
+	.tlb_sync = qcom_smmu_tlb_sync,
+};
+
+/* Implementation Defined Register Space 0 register offsets */
+static const u32 qcom_smmu_impl0_reg_offset[] = {
+	[QCOM_SMMU_TBU_PWR_STATUS]		= 0x2204,
+	[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]	= 0x25dc,
+	[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]	= 0x2670,
+};
+
+static const struct qcom_smmu_config qcm2290_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc7180_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc7280_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc8180x_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm6125_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm6350_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8150_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8250_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8350_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8450_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
+	{ .compatible = "qcom,msm8998-smmu-v2" },
+	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
+	{ .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
+	{ .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
+	{ .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
+	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
+	{ .compatible = "qcom,sdm630-smmu-v2" },
+	{ .compatible = "qcom,sdm845-smmu-500" },
+	{ .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
+	{ .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
+	{ .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
+	{ .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
+	{ .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
+	{ .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
+	{ }
 };
 
 static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 		const struct arm_smmu_impl *impl)
 {
 	struct qcom_smmu *qsmmu;
+	const struct of_device_id *match;
+	const struct device_node *np = smmu->dev->of_node;
 
 	/* Check to make sure qcom_scm has finished probing */
 	if (!qcom_scm_is_available())
@@ -398,28 +535,16 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 		return ERR_PTR(-ENOMEM);
 
 	qsmmu->smmu.impl = impl;
+	match = of_match_node(qcom_smmu_impl_of_match, np);
+	if (!match)
+		goto out;
+
+	qsmmu->cfg = match->data;
 
+out:
 	return &qsmmu->smmu;
 }
 
-static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
-	{ .compatible = "qcom,msm8998-smmu-v2" },
-	{ .compatible = "qcom,qcm2290-smmu-500" },
-	{ .compatible = "qcom,sc7180-smmu-500" },
-	{ .compatible = "qcom,sc7280-smmu-500" },
-	{ .compatible = "qcom,sc8180x-smmu-500" },
-	{ .compatible = "qcom,sc8280xp-smmu-500" },
-	{ .compatible = "qcom,sdm630-smmu-v2" },
-	{ .compatible = "qcom,sdm845-smmu-500" },
-	{ .compatible = "qcom,sm6125-smmu-500" },
-	{ .compatible = "qcom,sm6350-smmu-500" },
-	{ .compatible = "qcom,sm8150-smmu-500" },
-	{ .compatible = "qcom,sm8250-smmu-500" },
-	{ .compatible = "qcom,sm8350-smmu-500" },
-	{ .compatible = "qcom,sm8450-smmu-500" },
-	{ }
-};
-
 #ifdef CONFIG_ACPI
 static struct acpi_platform_list qcom_acpi_platlist[] = {
 	{ "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 2ed3594f384e..4c5b51109835 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -2099,6 +2099,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	if (IS_ERR(smmu->base))
 		return PTR_ERR(smmu->base);
 	ioaddr = res->start;
+	smmu->ioaddr = ioaddr;
+
 	/*
 	 * The resource size should effectively match the value of SMMU_TOP;
 	 * stash that temporarily until we know PAGESIZE to validate it with.
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 2b9b42fb6f30..703fd5817ec1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -278,6 +278,7 @@ struct arm_smmu_device {
 	struct device			*dev;
 
 	void __iomem			*base;
+	phys_addr_t			ioaddr;
 	unsigned int			numpage;
 	unsigned int			pgshift;
 
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-05-26  4:14 ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-05-26  4:14 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: Rob Clark, Sai Prakash Ranjan, linux-arm-msm, linux-kernel,
	iommu, quic_guptap, linux-arm-kernel

TLB sync timeouts can be due to various reasons such as TBU power down
or pending TCU/TBU invalidation/sync and so on. Debugging these often
require dumping of some implementation defined registers to know the
status of TBU/TCU operations and some of these registers are not
accessible in non-secure world such as from kernel and requires SMC
calls to read them in the secure world. So, add this debug support
to dump implementation defined registers for TLB sync timeout issues.

Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
---

Changes in v2:
 * Use scm call consistently so that it works on older chipsets where
   some of these regs are secure registers.
 * Add device specific data to get the implementation defined register
   offsets.

---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
 drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
 drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
 3 files changed, 146 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 7820711c4560..bb68aa85b28b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -5,13 +5,27 @@
 
 #include <linux/acpi.h>
 #include <linux/adreno-smmu-priv.h>
+#include <linux/delay.h>
 #include <linux/of_device.h>
 #include <linux/qcom_scm.h>
 
 #include "arm-smmu.h"
 
+#define QCOM_DUMMY_VAL	-1
+
+enum qcom_smmu_impl_reg_offset {
+	QCOM_SMMU_TBU_PWR_STATUS,
+	QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
+	QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
+};
+
+struct qcom_smmu_config {
+	const u32 *reg_offset;
+};
+
 struct qcom_smmu {
 	struct arm_smmu_device smmu;
+	const struct qcom_smmu_config *cfg;
 	bool bypass_quirk;
 	u8 bypass_cbndx;
 	u32 stall_enabled;
@@ -22,6 +36,56 @@ static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
 	return container_of(smmu, struct qcom_smmu, smmu);
 }
 
+static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
+				int sync, int status)
+{
+	int ret;
+	unsigned int spin_cnt, delay;
+	u32 reg, tbu_pwr_status, sync_inv_ack, sync_inv_progress;
+	struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+	const struct qcom_smmu_config *cfg;
+
+	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
+	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+			reg = arm_smmu_readl(smmu, page, status);
+			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
+				return;
+			cpu_relax();
+		}
+		udelay(delay);
+	}
+
+	dev_err_ratelimited(smmu->dev,
+			    "TLB sync timed out -- SMMU may be deadlocked\n");
+
+	cfg = qsmmu->cfg;
+	if (!cfg)
+		return;
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
+				&tbu_pwr_status);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TBU power status: %d\n", ret);
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
+				&sync_inv_ack);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TBU sync/inv ack status: %d\n", ret);
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
+				&sync_inv_progress);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TCU syn/inv progress: %d\n", ret);
+
+	dev_err_ratelimited(smmu->dev,
+			    "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
+			    tbu_pwr_status, sync_inv_ack, sync_inv_progress);
+}
+
 static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
 		u32 reg)
 {
@@ -374,6 +438,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
 	.def_domain_type = qcom_smmu_def_domain_type,
 	.reset = qcom_smmu500_reset,
 	.write_s2cr = qcom_smmu_write_s2cr,
+	.tlb_sync = qcom_smmu_tlb_sync,
 };
 
 static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
@@ -382,12 +447,84 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
 	.reset = qcom_smmu500_reset,
 	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
 	.write_sctlr = qcom_adreno_smmu_write_sctlr,
+	.tlb_sync = qcom_smmu_tlb_sync,
+};
+
+/* Implementation Defined Register Space 0 register offsets */
+static const u32 qcom_smmu_impl0_reg_offset[] = {
+	[QCOM_SMMU_TBU_PWR_STATUS]		= 0x2204,
+	[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]	= 0x25dc,
+	[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]	= 0x2670,
+};
+
+static const struct qcom_smmu_config qcm2290_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc7180_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc7280_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc8180x_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm6125_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm6350_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8150_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8250_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8350_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8450_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
+	{ .compatible = "qcom,msm8998-smmu-v2" },
+	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
+	{ .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
+	{ .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
+	{ .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
+	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
+	{ .compatible = "qcom,sdm630-smmu-v2" },
+	{ .compatible = "qcom,sdm845-smmu-500" },
+	{ .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
+	{ .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
+	{ .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
+	{ .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
+	{ .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
+	{ .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
+	{ }
 };
 
 static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 		const struct arm_smmu_impl *impl)
 {
 	struct qcom_smmu *qsmmu;
+	const struct of_device_id *match;
+	const struct device_node *np = smmu->dev->of_node;
 
 	/* Check to make sure qcom_scm has finished probing */
 	if (!qcom_scm_is_available())
@@ -398,28 +535,16 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 		return ERR_PTR(-ENOMEM);
 
 	qsmmu->smmu.impl = impl;
+	match = of_match_node(qcom_smmu_impl_of_match, np);
+	if (!match)
+		goto out;
+
+	qsmmu->cfg = match->data;
 
+out:
 	return &qsmmu->smmu;
 }
 
-static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
-	{ .compatible = "qcom,msm8998-smmu-v2" },
-	{ .compatible = "qcom,qcm2290-smmu-500" },
-	{ .compatible = "qcom,sc7180-smmu-500" },
-	{ .compatible = "qcom,sc7280-smmu-500" },
-	{ .compatible = "qcom,sc8180x-smmu-500" },
-	{ .compatible = "qcom,sc8280xp-smmu-500" },
-	{ .compatible = "qcom,sdm630-smmu-v2" },
-	{ .compatible = "qcom,sdm845-smmu-500" },
-	{ .compatible = "qcom,sm6125-smmu-500" },
-	{ .compatible = "qcom,sm6350-smmu-500" },
-	{ .compatible = "qcom,sm8150-smmu-500" },
-	{ .compatible = "qcom,sm8250-smmu-500" },
-	{ .compatible = "qcom,sm8350-smmu-500" },
-	{ .compatible = "qcom,sm8450-smmu-500" },
-	{ }
-};
-
 #ifdef CONFIG_ACPI
 static struct acpi_platform_list qcom_acpi_platlist[] = {
 	{ "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 2ed3594f384e..4c5b51109835 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -2099,6 +2099,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	if (IS_ERR(smmu->base))
 		return PTR_ERR(smmu->base);
 	ioaddr = res->start;
+	smmu->ioaddr = ioaddr;
+
 	/*
 	 * The resource size should effectively match the value of SMMU_TOP;
 	 * stash that temporarily until we know PAGESIZE to validate it with.
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 2b9b42fb6f30..703fd5817ec1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -278,6 +278,7 @@ struct arm_smmu_device {
 	struct device			*dev;
 
 	void __iomem			*base;
+	phys_addr_t			ioaddr;
 	unsigned int			numpage;
 	unsigned int			pgshift;
 
-- 
2.33.1

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-05-26  4:14 ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-05-26  4:14 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson, Sai Prakash Ranjan

TLB sync timeouts can be due to various reasons such as TBU power down
or pending TCU/TBU invalidation/sync and so on. Debugging these often
require dumping of some implementation defined registers to know the
status of TBU/TCU operations and some of these registers are not
accessible in non-secure world such as from kernel and requires SMC
calls to read them in the secure world. So, add this debug support
to dump implementation defined registers for TLB sync timeout issues.

Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
---

Changes in v2:
 * Use scm call consistently so that it works on older chipsets where
   some of these regs are secure registers.
 * Add device specific data to get the implementation defined register
   offsets.

---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
 drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
 drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
 3 files changed, 146 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 7820711c4560..bb68aa85b28b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -5,13 +5,27 @@
 
 #include <linux/acpi.h>
 #include <linux/adreno-smmu-priv.h>
+#include <linux/delay.h>
 #include <linux/of_device.h>
 #include <linux/qcom_scm.h>
 
 #include "arm-smmu.h"
 
+#define QCOM_DUMMY_VAL	-1
+
+enum qcom_smmu_impl_reg_offset {
+	QCOM_SMMU_TBU_PWR_STATUS,
+	QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
+	QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
+};
+
+struct qcom_smmu_config {
+	const u32 *reg_offset;
+};
+
 struct qcom_smmu {
 	struct arm_smmu_device smmu;
+	const struct qcom_smmu_config *cfg;
 	bool bypass_quirk;
 	u8 bypass_cbndx;
 	u32 stall_enabled;
@@ -22,6 +36,56 @@ static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
 	return container_of(smmu, struct qcom_smmu, smmu);
 }
 
+static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
+				int sync, int status)
+{
+	int ret;
+	unsigned int spin_cnt, delay;
+	u32 reg, tbu_pwr_status, sync_inv_ack, sync_inv_progress;
+	struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+	const struct qcom_smmu_config *cfg;
+
+	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
+	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+			reg = arm_smmu_readl(smmu, page, status);
+			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
+				return;
+			cpu_relax();
+		}
+		udelay(delay);
+	}
+
+	dev_err_ratelimited(smmu->dev,
+			    "TLB sync timed out -- SMMU may be deadlocked\n");
+
+	cfg = qsmmu->cfg;
+	if (!cfg)
+		return;
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
+				&tbu_pwr_status);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TBU power status: %d\n", ret);
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
+				&sync_inv_ack);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TBU sync/inv ack status: %d\n", ret);
+
+	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
+				&sync_inv_progress);
+	if (ret)
+		dev_err_ratelimited(smmu->dev,
+				    "Failed to read TCU syn/inv progress: %d\n", ret);
+
+	dev_err_ratelimited(smmu->dev,
+			    "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
+			    tbu_pwr_status, sync_inv_ack, sync_inv_progress);
+}
+
 static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
 		u32 reg)
 {
@@ -374,6 +438,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
 	.def_domain_type = qcom_smmu_def_domain_type,
 	.reset = qcom_smmu500_reset,
 	.write_s2cr = qcom_smmu_write_s2cr,
+	.tlb_sync = qcom_smmu_tlb_sync,
 };
 
 static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
@@ -382,12 +447,84 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
 	.reset = qcom_smmu500_reset,
 	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
 	.write_sctlr = qcom_adreno_smmu_write_sctlr,
+	.tlb_sync = qcom_smmu_tlb_sync,
+};
+
+/* Implementation Defined Register Space 0 register offsets */
+static const u32 qcom_smmu_impl0_reg_offset[] = {
+	[QCOM_SMMU_TBU_PWR_STATUS]		= 0x2204,
+	[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]	= 0x25dc,
+	[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]	= 0x2670,
+};
+
+static const struct qcom_smmu_config qcm2290_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc7180_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc7280_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc8180x_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm6125_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm6350_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8150_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8250_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8350_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct qcom_smmu_config sm8450_smmu_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
+static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
+	{ .compatible = "qcom,msm8998-smmu-v2" },
+	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
+	{ .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
+	{ .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
+	{ .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
+	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
+	{ .compatible = "qcom,sdm630-smmu-v2" },
+	{ .compatible = "qcom,sdm845-smmu-500" },
+	{ .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
+	{ .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
+	{ .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
+	{ .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
+	{ .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
+	{ .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
+	{ }
 };
 
 static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 		const struct arm_smmu_impl *impl)
 {
 	struct qcom_smmu *qsmmu;
+	const struct of_device_id *match;
+	const struct device_node *np = smmu->dev->of_node;
 
 	/* Check to make sure qcom_scm has finished probing */
 	if (!qcom_scm_is_available())
@@ -398,28 +535,16 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 		return ERR_PTR(-ENOMEM);
 
 	qsmmu->smmu.impl = impl;
+	match = of_match_node(qcom_smmu_impl_of_match, np);
+	if (!match)
+		goto out;
+
+	qsmmu->cfg = match->data;
 
+out:
 	return &qsmmu->smmu;
 }
 
-static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
-	{ .compatible = "qcom,msm8998-smmu-v2" },
-	{ .compatible = "qcom,qcm2290-smmu-500" },
-	{ .compatible = "qcom,sc7180-smmu-500" },
-	{ .compatible = "qcom,sc7280-smmu-500" },
-	{ .compatible = "qcom,sc8180x-smmu-500" },
-	{ .compatible = "qcom,sc8280xp-smmu-500" },
-	{ .compatible = "qcom,sdm630-smmu-v2" },
-	{ .compatible = "qcom,sdm845-smmu-500" },
-	{ .compatible = "qcom,sm6125-smmu-500" },
-	{ .compatible = "qcom,sm6350-smmu-500" },
-	{ .compatible = "qcom,sm8150-smmu-500" },
-	{ .compatible = "qcom,sm8250-smmu-500" },
-	{ .compatible = "qcom,sm8350-smmu-500" },
-	{ .compatible = "qcom,sm8450-smmu-500" },
-	{ }
-};
-
 #ifdef CONFIG_ACPI
 static struct acpi_platform_list qcom_acpi_platlist[] = {
 	{ "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 2ed3594f384e..4c5b51109835 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -2099,6 +2099,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	if (IS_ERR(smmu->base))
 		return PTR_ERR(smmu->base);
 	ioaddr = res->start;
+	smmu->ioaddr = ioaddr;
+
 	/*
 	 * The resource size should effectively match the value of SMMU_TOP;
 	 * stash that temporarily until we know PAGESIZE to validate it with.
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 2b9b42fb6f30..703fd5817ec1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -278,6 +278,7 @@ struct arm_smmu_device {
 	struct device			*dev;
 
 	void __iomem			*base;
+	phys_addr_t			ioaddr;
 	unsigned int			numpage;
 	unsigned int			pgshift;
 
-- 
2.33.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
  2022-05-26  4:14 ` Sai Prakash Ranjan
  (?)
@ 2022-06-08 21:22   ` Vincent Knecht
  -1 siblings, 0 replies; 25+ messages in thread
From: Vincent Knecht @ 2022-06-08 21:22 UTC (permalink / raw)
  To: Sai Prakash Ranjan, Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

Le jeudi 26 mai 2022 à 09:44 +0530, Sai Prakash Ranjan a écrit :
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>  * Use scm call consistently so that it works on older chipsets where
>    some of these regs are secure registers.
>  * Add device specific data to get the implementation defined register
>    offsets.
> 
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>  drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>  3 files changed, 146 insertions(+), 18 deletions(-)

Hi Sai, and thanks for this patch !

I've encountered TLB sync timeouts with msm8939 SoC recently.
What would be needed to add to this patch so this SoC is supported ?
Like, where could one check the values to be used in an equivalent
of qcom_smmu_impl0_reg_offset values for this SoC (if any change needed) ?
Current values are not found by simply greping in downstream/vendor dtsi/dts files...




_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-06-08 21:22   ` Vincent Knecht
  0 siblings, 0 replies; 25+ messages in thread
From: Vincent Knecht @ 2022-06-08 21:22 UTC (permalink / raw)
  To: Sai Prakash Ranjan, Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

Le jeudi 26 mai 2022 à 09:44 +0530, Sai Prakash Ranjan a écrit :
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>  * Use scm call consistently so that it works on older chipsets where
>    some of these regs are secure registers.
>  * Add device specific data to get the implementation defined register
>    offsets.
> 
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>  drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>  3 files changed, 146 insertions(+), 18 deletions(-)

Hi Sai, and thanks for this patch !

I've encountered TLB sync timeouts with msm8939 SoC recently.
What would be needed to add to this patch so this SoC is supported ?
Like, where could one check the values to be used in an equivalent
of qcom_smmu_impl0_reg_offset values for this SoC (if any change needed) ?
Current values are not found by simply greping in downstream/vendor dtsi/dts files...




^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-06-08 21:22   ` Vincent Knecht
  0 siblings, 0 replies; 25+ messages in thread
From: Vincent Knecht @ 2022-06-08 21:22 UTC (permalink / raw)
  To: Sai Prakash Ranjan, Will Deacon, Robin Murphy, Joerg Roedel
  Cc: Rob Clark, linux-arm-msm, linux-kernel, iommu, quic_guptap,
	linux-arm-kernel

Le jeudi 26 mai 2022 à 09:44 +0530, Sai Prakash Ranjan a écrit :
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>  * Use scm call consistently so that it works on older chipsets where
>    some of these regs are secure registers.
>  * Add device specific data to get the implementation defined register
>    offsets.
> 
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>  drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>  3 files changed, 146 insertions(+), 18 deletions(-)

Hi Sai, and thanks for this patch !

I've encountered TLB sync timeouts with msm8939 SoC recently.
What would be needed to add to this patch so this SoC is supported ?
Like, where could one check the values to be used in an equivalent
of qcom_smmu_impl0_reg_offset values for this SoC (if any change needed) ?
Current values are not found by simply greping in downstream/vendor dtsi/dts files...



_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
  2022-06-08 21:22   ` Vincent Knecht
  (?)
@ 2022-06-09  8:00     ` Sai Prakash Ranjan
  -1 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-06-09  8:00 UTC (permalink / raw)
  To: Vincent Knecht, Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

Hi Vincent,

On 6/9/2022 2:52 AM, Vincent Knecht wrote:
> Le jeudi 26 mai 2022 à 09:44 +0530, Sai Prakash Ranjan a écrit :
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
> Hi Sai, and thanks for this patch !
>
> I've encountered TLB sync timeouts with msm8939 SoC recently.
> What would be needed to add to this patch so this SoC is supported ?
> Like, where could one check the values to be used in an equivalent
> of qcom_smmu_impl0_reg_offset values for this SoC (if any change needed) ?
> Current values are not found by simply greping in downstream/vendor dtsi/dts files...

These are implementation defined registers and some might not be present on older SoCs
and sometimes they don't add this support in downstream kernels even if the registers
are present.

I looked up the IP doc for msm8939 and I could find only TBU_PWR_STATUS register and
you can use the same offset for it as given in this patch.

Thanks,
Sai

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-06-09  8:00     ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-06-09  8:00 UTC (permalink / raw)
  To: Vincent Knecht, Will Deacon, Robin Murphy, Joerg Roedel
  Cc: Rob Clark, linux-arm-msm, linux-kernel, iommu, quic_guptap,
	linux-arm-kernel

Hi Vincent,

On 6/9/2022 2:52 AM, Vincent Knecht wrote:
> Le jeudi 26 mai 2022 à 09:44 +0530, Sai Prakash Ranjan a écrit :
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
> Hi Sai, and thanks for this patch !
>
> I've encountered TLB sync timeouts with msm8939 SoC recently.
> What would be needed to add to this patch so this SoC is supported ?
> Like, where could one check the values to be used in an equivalent
> of qcom_smmu_impl0_reg_offset values for this SoC (if any change needed) ?
> Current values are not found by simply greping in downstream/vendor dtsi/dts files...

These are implementation defined registers and some might not be present on older SoCs
and sometimes they don't add this support in downstream kernels even if the registers
are present.

I looked up the IP doc for msm8939 and I could find only TBU_PWR_STATUS register and
you can use the same offset for it as given in this patch.

Thanks,
Sai
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-06-09  8:00     ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-06-09  8:00 UTC (permalink / raw)
  To: Vincent Knecht, Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

Hi Vincent,

On 6/9/2022 2:52 AM, Vincent Knecht wrote:
> Le jeudi 26 mai 2022 à 09:44 +0530, Sai Prakash Ranjan a écrit :
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
> Hi Sai, and thanks for this patch !
>
> I've encountered TLB sync timeouts with msm8939 SoC recently.
> What would be needed to add to this patch so this SoC is supported ?
> Like, where could one check the values to be used in an equivalent
> of qcom_smmu_impl0_reg_offset values for this SoC (if any change needed) ?
> Current values are not found by simply greping in downstream/vendor dtsi/dts files...

These are implementation defined registers and some might not be present on older SoCs
and sometimes they don't add this support in downstream kernels even if the registers
are present.

I looked up the IP doc for msm8939 and I could find only TBU_PWR_STATUS register and
you can use the same offset for it as given in this patch.

Thanks,
Sai

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
  2022-05-26  4:14 ` Sai Prakash Ranjan
  (?)
@ 2022-06-23  6:02   ` Sai Prakash Ranjan
  -1 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-06-23  6:02 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

On 5/26/2022 9:44 AM, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
>
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
>
> Changes in v2:
>   * Use scm call consistently so that it works on older chipsets where
>     some of these regs are secure registers.
>   * Add device specific data to get the implementation defined register
>     offsets.
>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>   3 files changed, 146 insertions(+), 18 deletions(-)

Any comments on this patch?

Thanks,
Sai

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-06-23  6:02   ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-06-23  6:02 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: Rob Clark, linux-arm-msm, linux-kernel, iommu, quic_guptap,
	linux-arm-kernel

On 5/26/2022 9:44 AM, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
>
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
>
> Changes in v2:
>   * Use scm call consistently so that it works on older chipsets where
>     some of these regs are secure registers.
>   * Add device specific data to get the implementation defined register
>     offsets.
>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>   3 files changed, 146 insertions(+), 18 deletions(-)

Any comments on this patch?

Thanks,
Sai
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-06-23  6:02   ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-06-23  6:02 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

On 5/26/2022 9:44 AM, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
>
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
>
> Changes in v2:
>   * Use scm call consistently so that it works on older chipsets where
>     some of these regs are secure registers.
>   * Add device specific data to get the implementation defined register
>     offsets.
>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>   3 files changed, 146 insertions(+), 18 deletions(-)

Any comments on this patch?

Thanks,
Sai

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
  2022-06-23  6:02   ` Sai Prakash Ranjan
  (?)
@ 2022-07-05  5:05     ` Sai Prakash Ranjan
  -1 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-07-05  5:05 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

On 6/23/2022 11:32 AM, Sai Prakash Ranjan wrote:
> On 5/26/2022 9:44 AM, Sai Prakash Ranjan wrote:
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
>
> Any comments on this patch?

Gentle Ping !!

Thanks,
Sai

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-07-05  5:05     ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-07-05  5:05 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: Rob Clark, linux-arm-msm, linux-kernel, iommu, quic_guptap,
	linux-arm-kernel

On 6/23/2022 11:32 AM, Sai Prakash Ranjan wrote:
> On 5/26/2022 9:44 AM, Sai Prakash Ranjan wrote:
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
>
> Any comments on this patch?

Gentle Ping !!

Thanks,
Sai
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-07-05  5:05     ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-07-05  5:05 UTC (permalink / raw)
  To: Will Deacon, Robin Murphy, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

On 6/23/2022 11:32 AM, Sai Prakash Ranjan wrote:
> On 5/26/2022 9:44 AM, Sai Prakash Ranjan wrote:
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
>
> Any comments on this patch?

Gentle Ping !!

Thanks,
Sai

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
  2022-05-26  4:14 ` Sai Prakash Ranjan
  (?)
@ 2022-07-06 11:56   ` Will Deacon
  -1 siblings, 0 replies; 25+ messages in thread
From: Will Deacon @ 2022-07-06 11:56 UTC (permalink / raw)
  To: Sai Prakash Ranjan
  Cc: Robin Murphy, Joerg Roedel, iommu, linux-arm-kernel,
	linux-kernel, linux-arm-msm, quic_guptap, Rob Clark,
	Bjorn Andersson

On Thu, May 26, 2022 at 09:44:03AM +0530, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>  * Use scm call consistently so that it works on older chipsets where
>    some of these regs are secure registers.
>  * Add device specific data to get the implementation defined register
>    offsets.
> 
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>  drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>  3 files changed, 146 insertions(+), 18 deletions(-)

If this is useful to you, then I suppose it's something we could support,
however I'm pretty worried about our ability to maintain/scale this stuff
as it is extended to support additional SoCs and other custom debugging
features.

Perhaps you could stick it all in arm-smmu-qcom-debug.c and have a new
config option for that, so at least it's even further out of the way?

Will

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-07-06 11:56   ` Will Deacon
  0 siblings, 0 replies; 25+ messages in thread
From: Will Deacon @ 2022-07-06 11:56 UTC (permalink / raw)
  To: Sai Prakash Ranjan
  Cc: Rob Clark, linux-arm-msm, linux-kernel, iommu, quic_guptap,
	Robin Murphy, linux-arm-kernel

On Thu, May 26, 2022 at 09:44:03AM +0530, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>  * Use scm call consistently so that it works on older chipsets where
>    some of these regs are secure registers.
>  * Add device specific data to get the implementation defined register
>    offsets.
> 
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>  drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>  3 files changed, 146 insertions(+), 18 deletions(-)

If this is useful to you, then I suppose it's something we could support,
however I'm pretty worried about our ability to maintain/scale this stuff
as it is extended to support additional SoCs and other custom debugging
features.

Perhaps you could stick it all in arm-smmu-qcom-debug.c and have a new
config option for that, so at least it's even further out of the way?

Will
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-07-06 11:56   ` Will Deacon
  0 siblings, 0 replies; 25+ messages in thread
From: Will Deacon @ 2022-07-06 11:56 UTC (permalink / raw)
  To: Sai Prakash Ranjan
  Cc: Robin Murphy, Joerg Roedel, iommu, linux-arm-kernel,
	linux-kernel, linux-arm-msm, quic_guptap, Rob Clark,
	Bjorn Andersson

On Thu, May 26, 2022 at 09:44:03AM +0530, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>  * Use scm call consistently so that it works on older chipsets where
>    some of these regs are secure registers.
>  * Add device specific data to get the implementation defined register
>    offsets.
> 
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>  drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>  3 files changed, 146 insertions(+), 18 deletions(-)

If this is useful to you, then I suppose it's something we could support,
however I'm pretty worried about our ability to maintain/scale this stuff
as it is extended to support additional SoCs and other custom debugging
features.

Perhaps you could stick it all in arm-smmu-qcom-debug.c and have a new
config option for that, so at least it's even further out of the way?

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
  2022-05-26  4:14 ` Sai Prakash Ranjan
  (?)
@ 2022-07-06 16:45   ` Robin Murphy
  -1 siblings, 0 replies; 25+ messages in thread
From: Robin Murphy @ 2022-07-06 16:45 UTC (permalink / raw)
  To: Sai Prakash Ranjan, Will Deacon, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

On 2022-05-26 05:14, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>   * Use scm call consistently so that it works on older chipsets where
>     some of these regs are secure registers.
>   * Add device specific data to get the implementation defined register
>     offsets.
> 
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>   3 files changed, 146 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index 7820711c4560..bb68aa85b28b 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -5,13 +5,27 @@
>   
>   #include <linux/acpi.h>
>   #include <linux/adreno-smmu-priv.h>
> +#include <linux/delay.h>
>   #include <linux/of_device.h>
>   #include <linux/qcom_scm.h>
>   
>   #include "arm-smmu.h"
>   
> +#define QCOM_DUMMY_VAL	-1
> +
> +enum qcom_smmu_impl_reg_offset {
> +	QCOM_SMMU_TBU_PWR_STATUS,
> +	QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
> +	QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
> +};
> +
> +struct qcom_smmu_config {
> +	const u32 *reg_offset;
> +};
> +
>   struct qcom_smmu {
>   	struct arm_smmu_device smmu;
> +	const struct qcom_smmu_config *cfg;
>   	bool bypass_quirk;
>   	u8 bypass_cbndx;
>   	u32 stall_enabled;
> @@ -22,6 +36,56 @@ static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>   	return container_of(smmu, struct qcom_smmu, smmu);
>   }
>   
> +static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
> +				int sync, int status)
> +{
> +	int ret;
> +	unsigned int spin_cnt, delay;
> +	u32 reg, tbu_pwr_status, sync_inv_ack, sync_inv_progress;
> +	struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
> +	const struct qcom_smmu_config *cfg;
> +
> +	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
> +	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
> +		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
> +			reg = arm_smmu_readl(smmu, page, status);
> +			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
> +				return;
> +			cpu_relax();
> +		}
> +		udelay(delay);
> +	}
> +
> +	dev_err_ratelimited(smmu->dev,
> +			    "TLB sync timed out -- SMMU may be deadlocked\n");

Maybe consider a single ratelimit state for the whole function so all 
the output stays together. If things go sufficiently wrong, mixed up 
bits of partial output from different events may be misleadingly 
unhelpful (and at the very least it'll be up to 5x more effective at the 
intent of limiting log spam).

> +	cfg = qsmmu->cfg;
> +	if (!cfg)
> +		return;
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
> +				&tbu_pwr_status);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TBU power status: %d\n", ret);
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
> +				&sync_inv_ack);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TBU sync/inv ack status: %d\n", ret);
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
> +				&sync_inv_progress);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TCU syn/inv progress: %d\n", ret);
> +
> +	dev_err_ratelimited(smmu->dev,
> +			    "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
> +			    tbu_pwr_status, sync_inv_ack, sync_inv_progress);
> +}
> +
>   static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
>   		u32 reg)
>   {
> @@ -374,6 +438,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
>   	.def_domain_type = qcom_smmu_def_domain_type,
>   	.reset = qcom_smmu500_reset,
>   	.write_s2cr = qcom_smmu_write_s2cr,
> +	.tlb_sync = qcom_smmu_tlb_sync,
>   };
>   
>   static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
> @@ -382,12 +447,84 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
>   	.reset = qcom_smmu500_reset,
>   	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
>   	.write_sctlr = qcom_adreno_smmu_write_sctlr,
> +	.tlb_sync = qcom_smmu_tlb_sync,
> +};
> +
> +/* Implementation Defined Register Space 0 register offsets */
> +static const u32 qcom_smmu_impl0_reg_offset[] = {
> +	[QCOM_SMMU_TBU_PWR_STATUS]		= 0x2204,
> +	[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]	= 0x25dc,
> +	[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]	= 0x2670,
> +};
> +
> +static const struct qcom_smmu_config qcm2290_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc7180_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc7280_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc8180x_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm6125_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm6350_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8150_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8250_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8350_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8450_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
> +	{ .compatible = "qcom,msm8998-smmu-v2" },
> +	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
> +	{ .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
> +	{ .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
> +	{ .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
> +	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
> +	{ .compatible = "qcom,sdm630-smmu-v2" },
> +	{ .compatible = "qcom,sdm845-smmu-500" },
> +	{ .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
> +	{ .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
> +	{ .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
> +	{ .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
> +	{ .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
> +	{ .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
> +	{ }
>   };
>   
>   static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>   		const struct arm_smmu_impl *impl)
>   {
>   	struct qcom_smmu *qsmmu;
> +	const struct of_device_id *match;
> +	const struct device_node *np = smmu->dev->of_node;
>   
>   	/* Check to make sure qcom_scm has finished probing */
>   	if (!qcom_scm_is_available())
> @@ -398,28 +535,16 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>   		return ERR_PTR(-ENOMEM);
>   
>   	qsmmu->smmu.impl = impl;
> +	match = of_match_node(qcom_smmu_impl_of_match, np);
> +	if (!match)
> +		goto out;
> +
> +	qsmmu->cfg = match->data;

I haven't been the of_device_get_match_data() police for quite some time 
now, but it's never too late :)

> +out:
>   	return &qsmmu->smmu;
>   }
>   
> -static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
> -	{ .compatible = "qcom,msm8998-smmu-v2" },
> -	{ .compatible = "qcom,qcm2290-smmu-500" },
> -	{ .compatible = "qcom,sc7180-smmu-500" },
> -	{ .compatible = "qcom,sc7280-smmu-500" },
> -	{ .compatible = "qcom,sc8180x-smmu-500" },
> -	{ .compatible = "qcom,sc8280xp-smmu-500" },
> -	{ .compatible = "qcom,sdm630-smmu-v2" },
> -	{ .compatible = "qcom,sdm845-smmu-500" },
> -	{ .compatible = "qcom,sm6125-smmu-500" },
> -	{ .compatible = "qcom,sm6350-smmu-500" },
> -	{ .compatible = "qcom,sm8150-smmu-500" },
> -	{ .compatible = "qcom,sm8250-smmu-500" },
> -	{ .compatible = "qcom,sm8350-smmu-500" },
> -	{ .compatible = "qcom,sm8450-smmu-500" },
> -	{ }
> -};
> -
>   #ifdef CONFIG_ACPI
>   static struct acpi_platform_list qcom_acpi_platlist[] = {
>   	{ "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 2ed3594f384e..4c5b51109835 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -2099,6 +2099,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>   	if (IS_ERR(smmu->base))
>   		return PTR_ERR(smmu->base);
>   	ioaddr = res->start;
> +	smmu->ioaddr = ioaddr;

It slightly bothers me to add something to the common structure that's 
only needed by some weird imp-def feature, but there's plenty of wasted 
space in there already, and I suppose it is information that the core 
driver does at least use in passing, so overall I think that's a 
resounding "meh". Maybe remove the local variable entirely to make it 
look less redundant?

Thanks,
Robin.

> +
>   	/*
>   	 * The resource size should effectively match the value of SMMU_TOP;
>   	 * stash that temporarily until we know PAGESIZE to validate it with.
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..703fd5817ec1 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -278,6 +278,7 @@ struct arm_smmu_device {
>   	struct device			*dev;
>   
>   	void __iomem			*base;
> +	phys_addr_t			ioaddr;
>   	unsigned int			numpage;
>   	unsigned int			pgshift;
>   

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-07-06 16:45   ` Robin Murphy
  0 siblings, 0 replies; 25+ messages in thread
From: Robin Murphy @ 2022-07-06 16:45 UTC (permalink / raw)
  To: Sai Prakash Ranjan, Will Deacon, Joerg Roedel
  Cc: Rob Clark, linux-arm-msm, linux-kernel, iommu, quic_guptap,
	linux-arm-kernel

On 2022-05-26 05:14, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>   * Use scm call consistently so that it works on older chipsets where
>     some of these regs are secure registers.
>   * Add device specific data to get the implementation defined register
>     offsets.
> 
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>   3 files changed, 146 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index 7820711c4560..bb68aa85b28b 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -5,13 +5,27 @@
>   
>   #include <linux/acpi.h>
>   #include <linux/adreno-smmu-priv.h>
> +#include <linux/delay.h>
>   #include <linux/of_device.h>
>   #include <linux/qcom_scm.h>
>   
>   #include "arm-smmu.h"
>   
> +#define QCOM_DUMMY_VAL	-1
> +
> +enum qcom_smmu_impl_reg_offset {
> +	QCOM_SMMU_TBU_PWR_STATUS,
> +	QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
> +	QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
> +};
> +
> +struct qcom_smmu_config {
> +	const u32 *reg_offset;
> +};
> +
>   struct qcom_smmu {
>   	struct arm_smmu_device smmu;
> +	const struct qcom_smmu_config *cfg;
>   	bool bypass_quirk;
>   	u8 bypass_cbndx;
>   	u32 stall_enabled;
> @@ -22,6 +36,56 @@ static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>   	return container_of(smmu, struct qcom_smmu, smmu);
>   }
>   
> +static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
> +				int sync, int status)
> +{
> +	int ret;
> +	unsigned int spin_cnt, delay;
> +	u32 reg, tbu_pwr_status, sync_inv_ack, sync_inv_progress;
> +	struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
> +	const struct qcom_smmu_config *cfg;
> +
> +	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
> +	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
> +		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
> +			reg = arm_smmu_readl(smmu, page, status);
> +			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
> +				return;
> +			cpu_relax();
> +		}
> +		udelay(delay);
> +	}
> +
> +	dev_err_ratelimited(smmu->dev,
> +			    "TLB sync timed out -- SMMU may be deadlocked\n");

Maybe consider a single ratelimit state for the whole function so all 
the output stays together. If things go sufficiently wrong, mixed up 
bits of partial output from different events may be misleadingly 
unhelpful (and at the very least it'll be up to 5x more effective at the 
intent of limiting log spam).

> +	cfg = qsmmu->cfg;
> +	if (!cfg)
> +		return;
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
> +				&tbu_pwr_status);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TBU power status: %d\n", ret);
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
> +				&sync_inv_ack);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TBU sync/inv ack status: %d\n", ret);
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
> +				&sync_inv_progress);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TCU syn/inv progress: %d\n", ret);
> +
> +	dev_err_ratelimited(smmu->dev,
> +			    "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
> +			    tbu_pwr_status, sync_inv_ack, sync_inv_progress);
> +}
> +
>   static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
>   		u32 reg)
>   {
> @@ -374,6 +438,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
>   	.def_domain_type = qcom_smmu_def_domain_type,
>   	.reset = qcom_smmu500_reset,
>   	.write_s2cr = qcom_smmu_write_s2cr,
> +	.tlb_sync = qcom_smmu_tlb_sync,
>   };
>   
>   static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
> @@ -382,12 +447,84 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
>   	.reset = qcom_smmu500_reset,
>   	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
>   	.write_sctlr = qcom_adreno_smmu_write_sctlr,
> +	.tlb_sync = qcom_smmu_tlb_sync,
> +};
> +
> +/* Implementation Defined Register Space 0 register offsets */
> +static const u32 qcom_smmu_impl0_reg_offset[] = {
> +	[QCOM_SMMU_TBU_PWR_STATUS]		= 0x2204,
> +	[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]	= 0x25dc,
> +	[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]	= 0x2670,
> +};
> +
> +static const struct qcom_smmu_config qcm2290_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc7180_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc7280_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc8180x_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm6125_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm6350_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8150_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8250_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8350_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8450_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
> +	{ .compatible = "qcom,msm8998-smmu-v2" },
> +	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
> +	{ .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
> +	{ .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
> +	{ .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
> +	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
> +	{ .compatible = "qcom,sdm630-smmu-v2" },
> +	{ .compatible = "qcom,sdm845-smmu-500" },
> +	{ .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
> +	{ .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
> +	{ .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
> +	{ .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
> +	{ .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
> +	{ .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
> +	{ }
>   };
>   
>   static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>   		const struct arm_smmu_impl *impl)
>   {
>   	struct qcom_smmu *qsmmu;
> +	const struct of_device_id *match;
> +	const struct device_node *np = smmu->dev->of_node;
>   
>   	/* Check to make sure qcom_scm has finished probing */
>   	if (!qcom_scm_is_available())
> @@ -398,28 +535,16 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>   		return ERR_PTR(-ENOMEM);
>   
>   	qsmmu->smmu.impl = impl;
> +	match = of_match_node(qcom_smmu_impl_of_match, np);
> +	if (!match)
> +		goto out;
> +
> +	qsmmu->cfg = match->data;

I haven't been the of_device_get_match_data() police for quite some time 
now, but it's never too late :)

> +out:
>   	return &qsmmu->smmu;
>   }
>   
> -static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
> -	{ .compatible = "qcom,msm8998-smmu-v2" },
> -	{ .compatible = "qcom,qcm2290-smmu-500" },
> -	{ .compatible = "qcom,sc7180-smmu-500" },
> -	{ .compatible = "qcom,sc7280-smmu-500" },
> -	{ .compatible = "qcom,sc8180x-smmu-500" },
> -	{ .compatible = "qcom,sc8280xp-smmu-500" },
> -	{ .compatible = "qcom,sdm630-smmu-v2" },
> -	{ .compatible = "qcom,sdm845-smmu-500" },
> -	{ .compatible = "qcom,sm6125-smmu-500" },
> -	{ .compatible = "qcom,sm6350-smmu-500" },
> -	{ .compatible = "qcom,sm8150-smmu-500" },
> -	{ .compatible = "qcom,sm8250-smmu-500" },
> -	{ .compatible = "qcom,sm8350-smmu-500" },
> -	{ .compatible = "qcom,sm8450-smmu-500" },
> -	{ }
> -};
> -
>   #ifdef CONFIG_ACPI
>   static struct acpi_platform_list qcom_acpi_platlist[] = {
>   	{ "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 2ed3594f384e..4c5b51109835 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -2099,6 +2099,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>   	if (IS_ERR(smmu->base))
>   		return PTR_ERR(smmu->base);
>   	ioaddr = res->start;
> +	smmu->ioaddr = ioaddr;

It slightly bothers me to add something to the common structure that's 
only needed by some weird imp-def feature, but there's plenty of wasted 
space in there already, and I suppose it is information that the core 
driver does at least use in passing, so overall I think that's a 
resounding "meh". Maybe remove the local variable entirely to make it 
look less redundant?

Thanks,
Robin.

> +
>   	/*
>   	 * The resource size should effectively match the value of SMMU_TOP;
>   	 * stash that temporarily until we know PAGESIZE to validate it with.
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..703fd5817ec1 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -278,6 +278,7 @@ struct arm_smmu_device {
>   	struct device			*dev;
>   
>   	void __iomem			*base;
> +	phys_addr_t			ioaddr;
>   	unsigned int			numpage;
>   	unsigned int			pgshift;
>   
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-07-06 16:45   ` Robin Murphy
  0 siblings, 0 replies; 25+ messages in thread
From: Robin Murphy @ 2022-07-06 16:45 UTC (permalink / raw)
  To: Sai Prakash Ranjan, Will Deacon, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

On 2022-05-26 05:14, Sai Prakash Ranjan wrote:
> TLB sync timeouts can be due to various reasons such as TBU power down
> or pending TCU/TBU invalidation/sync and so on. Debugging these often
> require dumping of some implementation defined registers to know the
> status of TBU/TCU operations and some of these registers are not
> accessible in non-secure world such as from kernel and requires SMC
> calls to read them in the secure world. So, add this debug support
> to dump implementation defined registers for TLB sync timeout issues.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
> 
> Changes in v2:
>   * Use scm call consistently so that it works on older chipsets where
>     some of these regs are secure registers.
>   * Add device specific data to get the implementation defined register
>     offsets.
> 
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>   3 files changed, 146 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index 7820711c4560..bb68aa85b28b 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -5,13 +5,27 @@
>   
>   #include <linux/acpi.h>
>   #include <linux/adreno-smmu-priv.h>
> +#include <linux/delay.h>
>   #include <linux/of_device.h>
>   #include <linux/qcom_scm.h>
>   
>   #include "arm-smmu.h"
>   
> +#define QCOM_DUMMY_VAL	-1
> +
> +enum qcom_smmu_impl_reg_offset {
> +	QCOM_SMMU_TBU_PWR_STATUS,
> +	QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
> +	QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
> +};
> +
> +struct qcom_smmu_config {
> +	const u32 *reg_offset;
> +};
> +
>   struct qcom_smmu {
>   	struct arm_smmu_device smmu;
> +	const struct qcom_smmu_config *cfg;
>   	bool bypass_quirk;
>   	u8 bypass_cbndx;
>   	u32 stall_enabled;
> @@ -22,6 +36,56 @@ static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>   	return container_of(smmu, struct qcom_smmu, smmu);
>   }
>   
> +static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
> +				int sync, int status)
> +{
> +	int ret;
> +	unsigned int spin_cnt, delay;
> +	u32 reg, tbu_pwr_status, sync_inv_ack, sync_inv_progress;
> +	struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
> +	const struct qcom_smmu_config *cfg;
> +
> +	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
> +	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
> +		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
> +			reg = arm_smmu_readl(smmu, page, status);
> +			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
> +				return;
> +			cpu_relax();
> +		}
> +		udelay(delay);
> +	}
> +
> +	dev_err_ratelimited(smmu->dev,
> +			    "TLB sync timed out -- SMMU may be deadlocked\n");

Maybe consider a single ratelimit state for the whole function so all 
the output stays together. If things go sufficiently wrong, mixed up 
bits of partial output from different events may be misleadingly 
unhelpful (and at the very least it'll be up to 5x more effective at the 
intent of limiting log spam).

> +	cfg = qsmmu->cfg;
> +	if (!cfg)
> +		return;
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
> +				&tbu_pwr_status);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TBU power status: %d\n", ret);
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
> +				&sync_inv_ack);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TBU sync/inv ack status: %d\n", ret);
> +
> +	ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
> +				&sync_inv_progress);
> +	if (ret)
> +		dev_err_ratelimited(smmu->dev,
> +				    "Failed to read TCU syn/inv progress: %d\n", ret);
> +
> +	dev_err_ratelimited(smmu->dev,
> +			    "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
> +			    tbu_pwr_status, sync_inv_ack, sync_inv_progress);
> +}
> +
>   static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
>   		u32 reg)
>   {
> @@ -374,6 +438,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
>   	.def_domain_type = qcom_smmu_def_domain_type,
>   	.reset = qcom_smmu500_reset,
>   	.write_s2cr = qcom_smmu_write_s2cr,
> +	.tlb_sync = qcom_smmu_tlb_sync,
>   };
>   
>   static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
> @@ -382,12 +447,84 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
>   	.reset = qcom_smmu500_reset,
>   	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
>   	.write_sctlr = qcom_adreno_smmu_write_sctlr,
> +	.tlb_sync = qcom_smmu_tlb_sync,
> +};
> +
> +/* Implementation Defined Register Space 0 register offsets */
> +static const u32 qcom_smmu_impl0_reg_offset[] = {
> +	[QCOM_SMMU_TBU_PWR_STATUS]		= 0x2204,
> +	[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]	= 0x25dc,
> +	[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]	= 0x2670,
> +};
> +
> +static const struct qcom_smmu_config qcm2290_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc7180_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc7280_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc8180x_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm6125_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm6350_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8150_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8250_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8350_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct qcom_smmu_config sm8450_smmu_cfg = {
> +	.reg_offset = qcom_smmu_impl0_reg_offset,
> +};
> +
> +static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
> +	{ .compatible = "qcom,msm8998-smmu-v2" },
> +	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
> +	{ .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
> +	{ .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
> +	{ .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
> +	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
> +	{ .compatible = "qcom,sdm630-smmu-v2" },
> +	{ .compatible = "qcom,sdm845-smmu-500" },
> +	{ .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
> +	{ .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
> +	{ .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
> +	{ .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
> +	{ .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
> +	{ .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
> +	{ }
>   };
>   
>   static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>   		const struct arm_smmu_impl *impl)
>   {
>   	struct qcom_smmu *qsmmu;
> +	const struct of_device_id *match;
> +	const struct device_node *np = smmu->dev->of_node;
>   
>   	/* Check to make sure qcom_scm has finished probing */
>   	if (!qcom_scm_is_available())
> @@ -398,28 +535,16 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>   		return ERR_PTR(-ENOMEM);
>   
>   	qsmmu->smmu.impl = impl;
> +	match = of_match_node(qcom_smmu_impl_of_match, np);
> +	if (!match)
> +		goto out;
> +
> +	qsmmu->cfg = match->data;

I haven't been the of_device_get_match_data() police for quite some time 
now, but it's never too late :)

> +out:
>   	return &qsmmu->smmu;
>   }
>   
> -static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
> -	{ .compatible = "qcom,msm8998-smmu-v2" },
> -	{ .compatible = "qcom,qcm2290-smmu-500" },
> -	{ .compatible = "qcom,sc7180-smmu-500" },
> -	{ .compatible = "qcom,sc7280-smmu-500" },
> -	{ .compatible = "qcom,sc8180x-smmu-500" },
> -	{ .compatible = "qcom,sc8280xp-smmu-500" },
> -	{ .compatible = "qcom,sdm630-smmu-v2" },
> -	{ .compatible = "qcom,sdm845-smmu-500" },
> -	{ .compatible = "qcom,sm6125-smmu-500" },
> -	{ .compatible = "qcom,sm6350-smmu-500" },
> -	{ .compatible = "qcom,sm8150-smmu-500" },
> -	{ .compatible = "qcom,sm8250-smmu-500" },
> -	{ .compatible = "qcom,sm8350-smmu-500" },
> -	{ .compatible = "qcom,sm8450-smmu-500" },
> -	{ }
> -};
> -
>   #ifdef CONFIG_ACPI
>   static struct acpi_platform_list qcom_acpi_platlist[] = {
>   	{ "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 2ed3594f384e..4c5b51109835 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -2099,6 +2099,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>   	if (IS_ERR(smmu->base))
>   		return PTR_ERR(smmu->base);
>   	ioaddr = res->start;
> +	smmu->ioaddr = ioaddr;

It slightly bothers me to add something to the common structure that's 
only needed by some weird imp-def feature, but there's plenty of wasted 
space in there already, and I suppose it is information that the core 
driver does at least use in passing, so overall I think that's a 
resounding "meh". Maybe remove the local variable entirely to make it 
look less redundant?

Thanks,
Robin.

> +
>   	/*
>   	 * The resource size should effectively match the value of SMMU_TOP;
>   	 * stash that temporarily until we know PAGESIZE to validate it with.
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..703fd5817ec1 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -278,6 +278,7 @@ struct arm_smmu_device {
>   	struct device			*dev;
>   
>   	void __iomem			*base;
> +	phys_addr_t			ioaddr;
>   	unsigned int			numpage;
>   	unsigned int			pgshift;
>   

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
  2022-07-06 11:56   ` Will Deacon
@ 2022-07-07  6:20     ` Sai Prakash Ranjan
  -1 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-07-07  6:20 UTC (permalink / raw)
  To: Will Deacon
  Cc: Robin Murphy, Joerg Roedel, iommu, linux-arm-kernel,
	linux-kernel, linux-arm-msm, quic_guptap, Rob Clark,
	Bjorn Andersson

On 7/6/2022 5:26 PM, Will Deacon wrote:
> On Thu, May 26, 2022 at 09:44:03AM +0530, Sai Prakash Ranjan wrote:
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
> If this is useful to you, then I suppose it's something we could support,
> however I'm pretty worried about our ability to maintain/scale this stuff
> as it is extended to support additional SoCs and other custom debugging
> features.
>
> Perhaps you could stick it all in arm-smmu-qcom-debug.c and have a new
> config option for that, so at least it's even further out of the way?
>
> Will

Sounds good to me, will do that.

Thanks,
Sai

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-07-07  6:20     ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-07-07  6:20 UTC (permalink / raw)
  To: Will Deacon
  Cc: Robin Murphy, Joerg Roedel, iommu, linux-arm-kernel,
	linux-kernel, linux-arm-msm, quic_guptap, Rob Clark,
	Bjorn Andersson

On 7/6/2022 5:26 PM, Will Deacon wrote:
> On Thu, May 26, 2022 at 09:44:03AM +0530, Sai Prakash Ranjan wrote:
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
> If this is useful to you, then I suppose it's something we could support,
> however I'm pretty worried about our ability to maintain/scale this stuff
> as it is extended to support additional SoCs and other custom debugging
> features.
>
> Perhaps you could stick it all in arm-smmu-qcom-debug.c and have a new
> config option for that, so at least it's even further out of the way?
>
> Will

Sounds good to me, will do that.

Thanks,
Sai

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
  2022-07-06 16:45   ` Robin Murphy
@ 2022-07-07  7:53     ` Sai Prakash Ranjan
  -1 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-07-07  7:53 UTC (permalink / raw)
  To: Robin Murphy, Will Deacon, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

Hi Robin,

On 7/6/2022 10:15 PM, Robin Murphy wrote:
> On 2022-05-26 05:14, Sai Prakash Ranjan wrote:
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> index 7820711c4560..bb68aa85b28b 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> @@ -5,13 +5,27 @@
>>     #include <linux/acpi.h>
>>   #include <linux/adreno-smmu-priv.h>
>> +#include <linux/delay.h>
>>   #include <linux/of_device.h>
>>   #include <linux/qcom_scm.h>
>>     #include "arm-smmu.h"
>>   +#define QCOM_DUMMY_VAL    -1
>> +
>> +enum qcom_smmu_impl_reg_offset {
>> +    QCOM_SMMU_TBU_PWR_STATUS,
>> +    QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
>> +    QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
>> +};
>> +
>> +struct qcom_smmu_config {
>> +    const u32 *reg_offset;
>> +};
>> +
>>   struct qcom_smmu {
>>       struct arm_smmu_device smmu;
>> +    const struct qcom_smmu_config *cfg;
>>       bool bypass_quirk;
>>       u8 bypass_cbndx;
>>       u32 stall_enabled;
>> @@ -22,6 +36,56 @@ static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>       return container_of(smmu, struct qcom_smmu, smmu);
>>   }
>>   +static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
>> +                int sync, int status)
>> +{
>> +    int ret;
>> +    unsigned int spin_cnt, delay;
>> +    u32 reg, tbu_pwr_status, sync_inv_ack, sync_inv_progress;
>> +    struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
>> +    const struct qcom_smmu_config *cfg;
>> +
>> +    arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
>> +    for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
>> +        for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
>> +            reg = arm_smmu_readl(smmu, page, status);
>> +            if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
>> +                return;
>> +            cpu_relax();
>> +        }
>> +        udelay(delay);
>> +    }
>> +
>> +    dev_err_ratelimited(smmu->dev,
>> +                "TLB sync timed out -- SMMU may be deadlocked\n");
>
> Maybe consider a single ratelimit state for the whole function so all the output stays together. If things go sufficiently wrong, mixed up bits of partial output from different events may be misleadingly unhelpful (and at the very least it'll be up to 5x more effective at the intent of limiting log spam).
>

Right, makes sense. Will change it.

>> +    cfg = qsmmu->cfg;
>> +    if (!cfg)
>> +        return;
>> +
>> +    ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
>> +                &tbu_pwr_status);
>> +    if (ret)
>> +        dev_err_ratelimited(smmu->dev,
>> +                    "Failed to read TBU power status: %d\n", ret);
>> +
>> +    ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
>> +                &sync_inv_ack);
>> +    if (ret)
>> +        dev_err_ratelimited(smmu->dev,
>> +                    "Failed to read TBU sync/inv ack status: %d\n", ret);
>> +
>> +    ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
>> +                &sync_inv_progress);
>> +    if (ret)
>> +        dev_err_ratelimited(smmu->dev,
>> +                    "Failed to read TCU syn/inv progress: %d\n", ret);
>> +
>> +    dev_err_ratelimited(smmu->dev,
>> +                "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
>> +                tbu_pwr_status, sync_inv_ack, sync_inv_progress);
>> +}
>> +
>>   static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
>>           u32 reg)
>>   {
>> @@ -374,6 +438,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
>>       .def_domain_type = qcom_smmu_def_domain_type,
>>       .reset = qcom_smmu500_reset,
>>       .write_s2cr = qcom_smmu_write_s2cr,
>> +    .tlb_sync = qcom_smmu_tlb_sync,
>>   };
>>     static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
>> @@ -382,12 +447,84 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
>>       .reset = qcom_smmu500_reset,
>>       .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
>>       .write_sctlr = qcom_adreno_smmu_write_sctlr,
>> +    .tlb_sync = qcom_smmu_tlb_sync,
>> +};
>> +
>> +/* Implementation Defined Register Space 0 register offsets */
>> +static const u32 qcom_smmu_impl0_reg_offset[] = {
>> +    [QCOM_SMMU_TBU_PWR_STATUS]        = 0x2204,
>> +    [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]    = 0x25dc,
>> +    [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]    = 0x2670,
>> +};
>> +
>> +static const struct qcom_smmu_config qcm2290_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sc7180_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sc7280_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sc8180x_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm6125_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm6350_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm8150_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm8250_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm8350_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm8450_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
>> +    { .compatible = "qcom,msm8998-smmu-v2" },
>> +    { .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
>> +    { .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
>> +    { .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
>> +    { .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
>> +    { .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
>> +    { .compatible = "qcom,sdm630-smmu-v2" },
>> +    { .compatible = "qcom,sdm845-smmu-500" },
>> +    { .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
>> +    { .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
>> +    { .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
>> +    { .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
>> +    { .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
>> +    { .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
>> +    { }
>>   };
>>     static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>>           const struct arm_smmu_impl *impl)
>>   {
>>       struct qcom_smmu *qsmmu;
>> +    const struct of_device_id *match;
>> +    const struct device_node *np = smmu->dev->of_node;
>>         /* Check to make sure qcom_scm has finished probing */
>>       if (!qcom_scm_is_available())
>> @@ -398,28 +535,16 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>>           return ERR_PTR(-ENOMEM);
>>         qsmmu->smmu.impl = impl;
>> +    match = of_match_node(qcom_smmu_impl_of_match, np);
>> +    if (!match)
>> +        goto out;
>> +
>> +    qsmmu->cfg = match->data;
>
> I haven't been the of_device_get_match_data() police for quite some time now, but it's never too late :)
>

I did check that :) and it doesn't work in our case as we need data from qcom_smmu_impl_of_match[]
table.

>> +out:
>>       return &qsmmu->smmu;
>>   }
>>   -static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
>> -    { .compatible = "qcom,msm8998-smmu-v2" },
>> -    { .compatible = "qcom,qcm2290-smmu-500" },
>> -    { .compatible = "qcom,sc7180-smmu-500" },
>> -    { .compatible = "qcom,sc7280-smmu-500" },
>> -    { .compatible = "qcom,sc8180x-smmu-500" },
>> -    { .compatible = "qcom,sc8280xp-smmu-500" },
>> -    { .compatible = "qcom,sdm630-smmu-v2" },
>> -    { .compatible = "qcom,sdm845-smmu-500" },
>> -    { .compatible = "qcom,sm6125-smmu-500" },
>> -    { .compatible = "qcom,sm6350-smmu-500" },
>> -    { .compatible = "qcom,sm8150-smmu-500" },
>> -    { .compatible = "qcom,sm8250-smmu-500" },
>> -    { .compatible = "qcom,sm8350-smmu-500" },
>> -    { .compatible = "qcom,sm8450-smmu-500" },
>> -    { }
>> -};
>> -
>>   #ifdef CONFIG_ACPI
>>   static struct acpi_platform_list qcom_acpi_platlist[] = {
>>       { "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> index 2ed3594f384e..4c5b51109835 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> @@ -2099,6 +2099,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>>       if (IS_ERR(smmu->base))
>>           return PTR_ERR(smmu->base);
>>       ioaddr = res->start;
>> +    smmu->ioaddr = ioaddr;
>
> It slightly bothers me to add something to the common structure that's only needed by some weird imp-def feature, but there's plenty of wasted space in there already, and I suppose it is information that the core driver does at least use in passing, so overall I think that's a resounding "meh". Maybe remove the local variable entirely to make it look less redundant?
>

Sure, will remove the local variable.

Thanks,
Sai

> Thanks,
> Robin.
>
>> +
>>       /*
>>        * The resource size should effectively match the value of SMMU_TOP;
>>        * stash that temporarily until we know PAGESIZE to validate it with.
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> index 2b9b42fb6f30..703fd5817ec1 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> @@ -278,6 +278,7 @@ struct arm_smmu_device {
>>       struct device            *dev;
>>         void __iomem            *base;
>> +    phys_addr_t            ioaddr;
>>       unsigned int            numpage;
>>       unsigned int            pgshift;


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts
@ 2022-07-07  7:53     ` Sai Prakash Ranjan
  0 siblings, 0 replies; 25+ messages in thread
From: Sai Prakash Ranjan @ 2022-07-07  7:53 UTC (permalink / raw)
  To: Robin Murphy, Will Deacon, Joerg Roedel
  Cc: iommu, linux-arm-kernel, linux-kernel, linux-arm-msm,
	quic_guptap, Rob Clark, Bjorn Andersson

Hi Robin,

On 7/6/2022 10:15 PM, Robin Murphy wrote:
> On 2022-05-26 05:14, Sai Prakash Ranjan wrote:
>> TLB sync timeouts can be due to various reasons such as TBU power down
>> or pending TCU/TBU invalidation/sync and so on. Debugging these often
>> require dumping of some implementation defined registers to know the
>> status of TBU/TCU operations and some of these registers are not
>> accessible in non-secure world such as from kernel and requires SMC
>> calls to read them in the secure world. So, add this debug support
>> to dump implementation defined registers for TLB sync timeout issues.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>
>> Changes in v2:
>>   * Use scm call consistently so that it works on older chipsets where
>>     some of these regs are secure registers.
>>   * Add device specific data to get the implementation defined register
>>     offsets.
>>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 161 ++++++++++++++++++---
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c      |   2 +
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |   1 +
>>   3 files changed, 146 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> index 7820711c4560..bb68aa85b28b 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> @@ -5,13 +5,27 @@
>>     #include <linux/acpi.h>
>>   #include <linux/adreno-smmu-priv.h>
>> +#include <linux/delay.h>
>>   #include <linux/of_device.h>
>>   #include <linux/qcom_scm.h>
>>     #include "arm-smmu.h"
>>   +#define QCOM_DUMMY_VAL    -1
>> +
>> +enum qcom_smmu_impl_reg_offset {
>> +    QCOM_SMMU_TBU_PWR_STATUS,
>> +    QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
>> +    QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
>> +};
>> +
>> +struct qcom_smmu_config {
>> +    const u32 *reg_offset;
>> +};
>> +
>>   struct qcom_smmu {
>>       struct arm_smmu_device smmu;
>> +    const struct qcom_smmu_config *cfg;
>>       bool bypass_quirk;
>>       u8 bypass_cbndx;
>>       u32 stall_enabled;
>> @@ -22,6 +36,56 @@ static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>       return container_of(smmu, struct qcom_smmu, smmu);
>>   }
>>   +static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
>> +                int sync, int status)
>> +{
>> +    int ret;
>> +    unsigned int spin_cnt, delay;
>> +    u32 reg, tbu_pwr_status, sync_inv_ack, sync_inv_progress;
>> +    struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
>> +    const struct qcom_smmu_config *cfg;
>> +
>> +    arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
>> +    for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
>> +        for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
>> +            reg = arm_smmu_readl(smmu, page, status);
>> +            if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
>> +                return;
>> +            cpu_relax();
>> +        }
>> +        udelay(delay);
>> +    }
>> +
>> +    dev_err_ratelimited(smmu->dev,
>> +                "TLB sync timed out -- SMMU may be deadlocked\n");
>
> Maybe consider a single ratelimit state for the whole function so all the output stays together. If things go sufficiently wrong, mixed up bits of partial output from different events may be misleadingly unhelpful (and at the very least it'll be up to 5x more effective at the intent of limiting log spam).
>

Right, makes sense. Will change it.

>> +    cfg = qsmmu->cfg;
>> +    if (!cfg)
>> +        return;
>> +
>> +    ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS],
>> +                &tbu_pwr_status);
>> +    if (ret)
>> +        dev_err_ratelimited(smmu->dev,
>> +                    "Failed to read TBU power status: %d\n", ret);
>> +
>> +    ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK],
>> +                &sync_inv_ack);
>> +    if (ret)
>> +        dev_err_ratelimited(smmu->dev,
>> +                    "Failed to read TBU sync/inv ack status: %d\n", ret);
>> +
>> +    ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR],
>> +                &sync_inv_progress);
>> +    if (ret)
>> +        dev_err_ratelimited(smmu->dev,
>> +                    "Failed to read TCU syn/inv progress: %d\n", ret);
>> +
>> +    dev_err_ratelimited(smmu->dev,
>> +                "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n",
>> +                tbu_pwr_status, sync_inv_ack, sync_inv_progress);
>> +}
>> +
>>   static void qcom_adreno_smmu_write_sctlr(struct arm_smmu_device *smmu, int idx,
>>           u32 reg)
>>   {
>> @@ -374,6 +438,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
>>       .def_domain_type = qcom_smmu_def_domain_type,
>>       .reset = qcom_smmu500_reset,
>>       .write_s2cr = qcom_smmu_write_s2cr,
>> +    .tlb_sync = qcom_smmu_tlb_sync,
>>   };
>>     static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
>> @@ -382,12 +447,84 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
>>       .reset = qcom_smmu500_reset,
>>       .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
>>       .write_sctlr = qcom_adreno_smmu_write_sctlr,
>> +    .tlb_sync = qcom_smmu_tlb_sync,
>> +};
>> +
>> +/* Implementation Defined Register Space 0 register offsets */
>> +static const u32 qcom_smmu_impl0_reg_offset[] = {
>> +    [QCOM_SMMU_TBU_PWR_STATUS]        = 0x2204,
>> +    [QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]    = 0x25dc,
>> +    [QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]    = 0x2670,
>> +};
>> +
>> +static const struct qcom_smmu_config qcm2290_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sc7180_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sc7280_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sc8180x_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm6125_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm6350_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm8150_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm8250_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm8350_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct qcom_smmu_config sm8450_smmu_cfg = {
>> +    .reg_offset = qcom_smmu_impl0_reg_offset,
>> +};
>> +
>> +static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
>> +    { .compatible = "qcom,msm8998-smmu-v2" },
>> +    { .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
>> +    { .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
>> +    { .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
>> +    { .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
>> +    { .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
>> +    { .compatible = "qcom,sdm630-smmu-v2" },
>> +    { .compatible = "qcom,sdm845-smmu-500" },
>> +    { .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
>> +    { .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
>> +    { .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
>> +    { .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
>> +    { .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
>> +    { .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
>> +    { }
>>   };
>>     static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>>           const struct arm_smmu_impl *impl)
>>   {
>>       struct qcom_smmu *qsmmu;
>> +    const struct of_device_id *match;
>> +    const struct device_node *np = smmu->dev->of_node;
>>         /* Check to make sure qcom_scm has finished probing */
>>       if (!qcom_scm_is_available())
>> @@ -398,28 +535,16 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
>>           return ERR_PTR(-ENOMEM);
>>         qsmmu->smmu.impl = impl;
>> +    match = of_match_node(qcom_smmu_impl_of_match, np);
>> +    if (!match)
>> +        goto out;
>> +
>> +    qsmmu->cfg = match->data;
>
> I haven't been the of_device_get_match_data() police for quite some time now, but it's never too late :)
>

I did check that :) and it doesn't work in our case as we need data from qcom_smmu_impl_of_match[]
table.

>> +out:
>>       return &qsmmu->smmu;
>>   }
>>   -static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
>> -    { .compatible = "qcom,msm8998-smmu-v2" },
>> -    { .compatible = "qcom,qcm2290-smmu-500" },
>> -    { .compatible = "qcom,sc7180-smmu-500" },
>> -    { .compatible = "qcom,sc7280-smmu-500" },
>> -    { .compatible = "qcom,sc8180x-smmu-500" },
>> -    { .compatible = "qcom,sc8280xp-smmu-500" },
>> -    { .compatible = "qcom,sdm630-smmu-v2" },
>> -    { .compatible = "qcom,sdm845-smmu-500" },
>> -    { .compatible = "qcom,sm6125-smmu-500" },
>> -    { .compatible = "qcom,sm6350-smmu-500" },
>> -    { .compatible = "qcom,sm8150-smmu-500" },
>> -    { .compatible = "qcom,sm8250-smmu-500" },
>> -    { .compatible = "qcom,sm8350-smmu-500" },
>> -    { .compatible = "qcom,sm8450-smmu-500" },
>> -    { }
>> -};
>> -
>>   #ifdef CONFIG_ACPI
>>   static struct acpi_platform_list qcom_acpi_platlist[] = {
>>       { "LENOVO", "CB-01   ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> index 2ed3594f384e..4c5b51109835 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> @@ -2099,6 +2099,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>>       if (IS_ERR(smmu->base))
>>           return PTR_ERR(smmu->base);
>>       ioaddr = res->start;
>> +    smmu->ioaddr = ioaddr;
>
> It slightly bothers me to add something to the common structure that's only needed by some weird imp-def feature, but there's plenty of wasted space in there already, and I suppose it is information that the core driver does at least use in passing, so overall I think that's a resounding "meh". Maybe remove the local variable entirely to make it look less redundant?
>

Sure, will remove the local variable.

Thanks,
Sai

> Thanks,
> Robin.
>
>> +
>>       /*
>>        * The resource size should effectively match the value of SMMU_TOP;
>>        * stash that temporarily until we know PAGESIZE to validate it with.
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> index 2b9b42fb6f30..703fd5817ec1 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> @@ -278,6 +278,7 @@ struct arm_smmu_device {
>>       struct device            *dev;
>>         void __iomem            *base;
>> +    phys_addr_t            ioaddr;
>>       unsigned int            numpage;
>>       unsigned int            pgshift;


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2022-07-07  7:54 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-26  4:14 [PATCHv2] iommu/arm-smmu-qcom: Add debug support for TLB sync timeouts Sai Prakash Ranjan
2022-05-26  4:14 ` Sai Prakash Ranjan
2022-05-26  4:14 ` Sai Prakash Ranjan
2022-06-08 21:22 ` Vincent Knecht
2022-06-08 21:22   ` Vincent Knecht
2022-06-08 21:22   ` Vincent Knecht
2022-06-09  8:00   ` Sai Prakash Ranjan
2022-06-09  8:00     ` Sai Prakash Ranjan
2022-06-09  8:00     ` Sai Prakash Ranjan
2022-06-23  6:02 ` Sai Prakash Ranjan
2022-06-23  6:02   ` Sai Prakash Ranjan
2022-06-23  6:02   ` Sai Prakash Ranjan
2022-07-05  5:05   ` Sai Prakash Ranjan
2022-07-05  5:05     ` Sai Prakash Ranjan
2022-07-05  5:05     ` Sai Prakash Ranjan
2022-07-06 11:56 ` Will Deacon
2022-07-06 11:56   ` Will Deacon
2022-07-06 11:56   ` Will Deacon
2022-07-07  6:20   ` Sai Prakash Ranjan
2022-07-07  6:20     ` Sai Prakash Ranjan
2022-07-06 16:45 ` Robin Murphy
2022-07-06 16:45   ` Robin Murphy
2022-07-06 16:45   ` Robin Murphy
2022-07-07  7:53   ` Sai Prakash Ranjan
2022-07-07  7:53     ` Sai Prakash Ranjan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.