All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-17  9:04 ` Ashish Mhetre via iommu
  0 siblings, 0 replies; 12+ messages in thread
From: Ashish Mhetre @ 2022-04-17  9:04 UTC (permalink / raw)
  To: thierry.reding, vdumpa, will, robin.murphy, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: nicolinc, Snikam, Ashish Mhetre, Pritesh Raithatha

Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
entries to not be invalidated correctly. The problem is that the walk
cache index generated for IOVA is not same across translation and
invalidation requests. This is leading to page faults when PMD entry is
released during unmap and populated with new PTE table during subsequent
map request. Disabling large page mappings avoids the release of PMD
entry and avoid translations seeing stale PMD entry in walk cache.
Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
Tegra234 devices. This is recommended fix from Tegra hardware design
team.

Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
 drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
 3 files changed, 27 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
index 01e9b50b10a1..b7a3d06da2f4 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
@@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
 			dev_name(dev), err);
 }
 
+static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
+{
+	const struct device_node *np = smmu->dev->of_node;
+
+	/*
+	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
+	 * entries to not be invalidated correctly. The problem is that the walk
+	 * cache index generated for IOVA is not same across translation and
+	 * invalidation requests. This is leading to page faults when PMD entry
+	 * is released during unmap and populated with new PTE table during
+	 * subsequent map request. Disabling large page mappings avoids the
+	 * release of PMD entry and avoid translations seeing stale PMD entry in
+	 * walk cache.
+	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
+	 * Tegra234.
+	 */
+	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
+	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
+		smmu->pgsize_bitmap = PAGE_SIZE;
+}
+
 static const struct arm_smmu_impl nvidia_smmu_impl = {
 	.read_reg = nvidia_smmu_read_reg,
 	.write_reg = nvidia_smmu_write_reg,
@@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
 	.global_fault = nvidia_smmu_global_fault,
 	.context_fault = nvidia_smmu_context_fault,
 	.probe_finalize = nvidia_smmu_probe_finalize,
+	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
 };
 
 static const struct arm_smmu_impl nvidia_smmu_single_impl = {
 	.probe_finalize = nvidia_smmu_probe_finalize,
+	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
 };
 
 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 568cce590ccc..3692a19a588a 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
 
+	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
+		smmu->impl->cfg_pgsize_bitmap(smmu);
+
 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
 	else
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 2b9b42fb6f30..5d9b03024969 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -442,6 +442,7 @@ struct arm_smmu_impl {
 	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
 	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
 	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
+	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
 };
 
 #define INVALID_SMENDX			-1
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-17  9:04 ` Ashish Mhetre via iommu
  0 siblings, 0 replies; 12+ messages in thread
From: Ashish Mhetre via iommu @ 2022-04-17  9:04 UTC (permalink / raw)
  To: thierry.reding, vdumpa, will, robin.murphy, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: Ashish Mhetre, Snikam, Pritesh Raithatha

Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
entries to not be invalidated correctly. The problem is that the walk
cache index generated for IOVA is not same across translation and
invalidation requests. This is leading to page faults when PMD entry is
released during unmap and populated with new PTE table during subsequent
map request. Disabling large page mappings avoids the release of PMD
entry and avoid translations seeing stale PMD entry in walk cache.
Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
Tegra234 devices. This is recommended fix from Tegra hardware design
team.

Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
 drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
 3 files changed, 27 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
index 01e9b50b10a1..b7a3d06da2f4 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
@@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
 			dev_name(dev), err);
 }
 
+static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
+{
+	const struct device_node *np = smmu->dev->of_node;
+
+	/*
+	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
+	 * entries to not be invalidated correctly. The problem is that the walk
+	 * cache index generated for IOVA is not same across translation and
+	 * invalidation requests. This is leading to page faults when PMD entry
+	 * is released during unmap and populated with new PTE table during
+	 * subsequent map request. Disabling large page mappings avoids the
+	 * release of PMD entry and avoid translations seeing stale PMD entry in
+	 * walk cache.
+	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
+	 * Tegra234.
+	 */
+	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
+	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
+		smmu->pgsize_bitmap = PAGE_SIZE;
+}
+
 static const struct arm_smmu_impl nvidia_smmu_impl = {
 	.read_reg = nvidia_smmu_read_reg,
 	.write_reg = nvidia_smmu_write_reg,
@@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
 	.global_fault = nvidia_smmu_global_fault,
 	.context_fault = nvidia_smmu_context_fault,
 	.probe_finalize = nvidia_smmu_probe_finalize,
+	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
 };
 
 static const struct arm_smmu_impl nvidia_smmu_single_impl = {
 	.probe_finalize = nvidia_smmu_probe_finalize,
+	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
 };
 
 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 568cce590ccc..3692a19a588a 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
 
+	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
+		smmu->impl->cfg_pgsize_bitmap(smmu);
+
 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
 	else
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 2b9b42fb6f30..5d9b03024969 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -442,6 +442,7 @@ struct arm_smmu_impl {
 	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
 	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
 	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
+	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
 };
 
 #define INVALID_SMENDX			-1
-- 
2.17.1

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-17  9:04 ` Ashish Mhetre via iommu
  0 siblings, 0 replies; 12+ messages in thread
From: Ashish Mhetre @ 2022-04-17  9:04 UTC (permalink / raw)
  To: thierry.reding, vdumpa, will, robin.murphy, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: nicolinc, Snikam, Ashish Mhetre, Pritesh Raithatha

Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
entries to not be invalidated correctly. The problem is that the walk
cache index generated for IOVA is not same across translation and
invalidation requests. This is leading to page faults when PMD entry is
released during unmap and populated with new PTE table during subsequent
map request. Disabling large page mappings avoids the release of PMD
entry and avoid translations seeing stale PMD entry in walk cache.
Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
Tegra234 devices. This is recommended fix from Tegra hardware design
team.

Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
 drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
 drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
 3 files changed, 27 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
index 01e9b50b10a1..b7a3d06da2f4 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
@@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
 			dev_name(dev), err);
 }
 
+static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
+{
+	const struct device_node *np = smmu->dev->of_node;
+
+	/*
+	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
+	 * entries to not be invalidated correctly. The problem is that the walk
+	 * cache index generated for IOVA is not same across translation and
+	 * invalidation requests. This is leading to page faults when PMD entry
+	 * is released during unmap and populated with new PTE table during
+	 * subsequent map request. Disabling large page mappings avoids the
+	 * release of PMD entry and avoid translations seeing stale PMD entry in
+	 * walk cache.
+	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
+	 * Tegra234.
+	 */
+	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
+	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
+		smmu->pgsize_bitmap = PAGE_SIZE;
+}
+
 static const struct arm_smmu_impl nvidia_smmu_impl = {
 	.read_reg = nvidia_smmu_read_reg,
 	.write_reg = nvidia_smmu_write_reg,
@@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
 	.global_fault = nvidia_smmu_global_fault,
 	.context_fault = nvidia_smmu_context_fault,
 	.probe_finalize = nvidia_smmu_probe_finalize,
+	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
 };
 
 static const struct arm_smmu_impl nvidia_smmu_single_impl = {
 	.probe_finalize = nvidia_smmu_probe_finalize,
+	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
 };
 
 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 568cce590ccc..3692a19a588a 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
 
+	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
+		smmu->impl->cfg_pgsize_bitmap(smmu);
+
 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
 	else
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 2b9b42fb6f30..5d9b03024969 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -442,6 +442,7 @@ struct arm_smmu_impl {
 	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
 	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
 	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
+	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
 };
 
 #define INVALID_SMENDX			-1
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
  2022-04-17  9:04 ` Ashish Mhetre via iommu
  (?)
@ 2022-04-19 12:51   ` Jon Hunter via iommu
  -1 siblings, 0 replies; 12+ messages in thread
From: Jon Hunter @ 2022-04-19 12:51 UTC (permalink / raw)
  To: Ashish Mhetre, thierry.reding, vdumpa, will, robin.murphy, joro,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: nicolinc, Snikam, Pritesh Raithatha


On 17/04/2022 10:04, Ashish Mhetre wrote:
> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> entries to not be invalidated correctly. The problem is that the walk
> cache index generated for IOVA is not same across translation and
> invalidation requests. This is leading to page faults when PMD entry is
> released during unmap and populated with new PTE table during subsequent
> map request. Disabling large page mappings avoids the release of PMD
> entry and avoid translations seeing stale PMD entry in walk cache.
> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
> Tegra234 devices. This is recommended fix from Tegra hardware design
> team.
> 
> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>   3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> index 01e9b50b10a1..b7a3d06da2f4 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
>   			dev_name(dev), err);
>   }
>   
> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
> +{
> +	const struct device_node *np = smmu->dev->of_node;
> +
> +	/*
> +	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> +	 * entries to not be invalidated correctly. The problem is that the walk
> +	 * cache index generated for IOVA is not same across translation and
> +	 * invalidation requests. This is leading to page faults when PMD entry
> +	 * is released during unmap and populated with new PTE table during
> +	 * subsequent map request. Disabling large page mappings avoids the
> +	 * release of PMD entry and avoid translations seeing stale PMD entry in
> +	 * walk cache.
> +	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
> +	 * Tegra234.
> +	 */
> +	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
> +	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
> +		smmu->pgsize_bitmap = PAGE_SIZE;
> +}
> +
>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.read_reg = nvidia_smmu_read_reg,
>   	.write_reg = nvidia_smmu_write_reg,
> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.global_fault = nvidia_smmu_global_fault,
>   	.context_fault = nvidia_smmu_context_fault,
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 568cce590ccc..3692a19a588a 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>   	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>   		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>   
> +	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
> +		smmu->impl->cfg_pgsize_bitmap(smmu);
> +
>   	if (arm_smmu_ops.pgsize_bitmap == -1UL)
>   		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>   	else
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..5d9b03024969 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>   	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>   	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
>   	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
> +	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>   };
>   
>   #define INVALID_SMENDX			-1


Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>

Thanks!
Jon

-- 
nvpublic

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-19 12:51   ` Jon Hunter via iommu
  0 siblings, 0 replies; 12+ messages in thread
From: Jon Hunter via iommu @ 2022-04-19 12:51 UTC (permalink / raw)
  To: Ashish Mhetre, thierry.reding, vdumpa, will, robin.murphy, joro,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: Snikam, Pritesh Raithatha


On 17/04/2022 10:04, Ashish Mhetre wrote:
> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> entries to not be invalidated correctly. The problem is that the walk
> cache index generated for IOVA is not same across translation and
> invalidation requests. This is leading to page faults when PMD entry is
> released during unmap and populated with new PTE table during subsequent
> map request. Disabling large page mappings avoids the release of PMD
> entry and avoid translations seeing stale PMD entry in walk cache.
> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
> Tegra234 devices. This is recommended fix from Tegra hardware design
> team.
> 
> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>   3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> index 01e9b50b10a1..b7a3d06da2f4 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
>   			dev_name(dev), err);
>   }
>   
> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
> +{
> +	const struct device_node *np = smmu->dev->of_node;
> +
> +	/*
> +	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> +	 * entries to not be invalidated correctly. The problem is that the walk
> +	 * cache index generated for IOVA is not same across translation and
> +	 * invalidation requests. This is leading to page faults when PMD entry
> +	 * is released during unmap and populated with new PTE table during
> +	 * subsequent map request. Disabling large page mappings avoids the
> +	 * release of PMD entry and avoid translations seeing stale PMD entry in
> +	 * walk cache.
> +	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
> +	 * Tegra234.
> +	 */
> +	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
> +	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
> +		smmu->pgsize_bitmap = PAGE_SIZE;
> +}
> +
>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.read_reg = nvidia_smmu_read_reg,
>   	.write_reg = nvidia_smmu_write_reg,
> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.global_fault = nvidia_smmu_global_fault,
>   	.context_fault = nvidia_smmu_context_fault,
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 568cce590ccc..3692a19a588a 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>   	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>   		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>   
> +	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
> +		smmu->impl->cfg_pgsize_bitmap(smmu);
> +
>   	if (arm_smmu_ops.pgsize_bitmap == -1UL)
>   		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>   	else
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..5d9b03024969 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>   	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>   	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
>   	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
> +	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>   };
>   
>   #define INVALID_SMENDX			-1


Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>

Thanks!
Jon

-- 
nvpublic
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-19 12:51   ` Jon Hunter via iommu
  0 siblings, 0 replies; 12+ messages in thread
From: Jon Hunter @ 2022-04-19 12:51 UTC (permalink / raw)
  To: Ashish Mhetre, thierry.reding, vdumpa, will, robin.murphy, joro,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: nicolinc, Snikam, Pritesh Raithatha


On 17/04/2022 10:04, Ashish Mhetre wrote:
> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> entries to not be invalidated correctly. The problem is that the walk
> cache index generated for IOVA is not same across translation and
> invalidation requests. This is leading to page faults when PMD entry is
> released during unmap and populated with new PTE table during subsequent
> map request. Disabling large page mappings avoids the release of PMD
> entry and avoid translations seeing stale PMD entry in walk cache.
> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
> Tegra234 devices. This is recommended fix from Tegra hardware design
> team.
> 
> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>   3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> index 01e9b50b10a1..b7a3d06da2f4 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
>   			dev_name(dev), err);
>   }
>   
> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
> +{
> +	const struct device_node *np = smmu->dev->of_node;
> +
> +	/*
> +	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> +	 * entries to not be invalidated correctly. The problem is that the walk
> +	 * cache index generated for IOVA is not same across translation and
> +	 * invalidation requests. This is leading to page faults when PMD entry
> +	 * is released during unmap and populated with new PTE table during
> +	 * subsequent map request. Disabling large page mappings avoids the
> +	 * release of PMD entry and avoid translations seeing stale PMD entry in
> +	 * walk cache.
> +	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
> +	 * Tegra234.
> +	 */
> +	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
> +	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
> +		smmu->pgsize_bitmap = PAGE_SIZE;
> +}
> +
>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.read_reg = nvidia_smmu_read_reg,
>   	.write_reg = nvidia_smmu_write_reg,
> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.global_fault = nvidia_smmu_global_fault,
>   	.context_fault = nvidia_smmu_context_fault,
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 568cce590ccc..3692a19a588a 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>   	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>   		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>   
> +	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
> +		smmu->impl->cfg_pgsize_bitmap(smmu);
> +
>   	if (arm_smmu_ops.pgsize_bitmap == -1UL)
>   		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>   	else
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..5d9b03024969 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>   	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>   	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
>   	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
> +	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>   };
>   
>   #define INVALID_SMENDX			-1


Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>

Thanks!
Jon

-- 
nvpublic

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
  2022-04-17  9:04 ` Ashish Mhetre via iommu
  (?)
@ 2022-04-19 20:27   ` Robin Murphy
  -1 siblings, 0 replies; 12+ messages in thread
From: Robin Murphy @ 2022-04-19 20:27 UTC (permalink / raw)
  To: Ashish Mhetre, thierry.reding, vdumpa, will, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: nicolinc, Snikam, Pritesh Raithatha

On 2022-04-17 10:04, Ashish Mhetre wrote:
> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> entries to not be invalidated correctly. The problem is that the walk
> cache index generated for IOVA is not same across translation and
> invalidation requests. This is leading to page faults when PMD entry is
> released during unmap and populated with new PTE table during subsequent
> map request. Disabling large page mappings avoids the release of PMD
> entry and avoid translations seeing stale PMD entry in walk cache.
> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
> Tegra234 devices. This is recommended fix from Tegra hardware design
> team.

Is this related to any of the several known MMU-500 invalidation errata, 
or is it definitely specific to something NVIDIA have done with their 
integration?

> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>   3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> index 01e9b50b10a1..b7a3d06da2f4 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
>   			dev_name(dev), err);
>   }
>   
> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
> +{
> +	const struct device_node *np = smmu->dev->of_node;
> +
> +	/*
> +	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> +	 * entries to not be invalidated correctly. The problem is that the walk
> +	 * cache index generated for IOVA is not same across translation and
> +	 * invalidation requests. This is leading to page faults when PMD entry
> +	 * is released during unmap and populated with new PTE table during
> +	 * subsequent map request. Disabling large page mappings avoids the
> +	 * release of PMD entry and avoid translations seeing stale PMD entry in
> +	 * walk cache.
> +	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
> +	 * Tegra234.
> +	 */
> +	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
> +	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
> +		smmu->pgsize_bitmap = PAGE_SIZE;
> +}
> +
>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.read_reg = nvidia_smmu_read_reg,
>   	.write_reg = nvidia_smmu_write_reg,
> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.global_fault = nvidia_smmu_global_fault,
>   	.context_fault = nvidia_smmu_context_fault,
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 568cce590ccc..3692a19a588a 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>   	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>   		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>   
> +	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
> +		smmu->impl->cfg_pgsize_bitmap(smmu);

I'm not the biggest fan of adding a super-specific hook for this, when 
it seems like it could just as easily be handled in the init_context 
hook, which is where it is precisely for the purpose of mangling the 
pgtable_cfg to influence io-pgtable's behaviour.

Thanks,
Robin.

> +
>   	if (arm_smmu_ops.pgsize_bitmap == -1UL)
>   		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>   	else
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..5d9b03024969 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>   	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>   	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
>   	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
> +	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>   };
>   
>   #define INVALID_SMENDX			-1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-19 20:27   ` Robin Murphy
  0 siblings, 0 replies; 12+ messages in thread
From: Robin Murphy @ 2022-04-19 20:27 UTC (permalink / raw)
  To: Ashish Mhetre, thierry.reding, vdumpa, will, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: Snikam, Pritesh Raithatha

On 2022-04-17 10:04, Ashish Mhetre wrote:
> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> entries to not be invalidated correctly. The problem is that the walk
> cache index generated for IOVA is not same across translation and
> invalidation requests. This is leading to page faults when PMD entry is
> released during unmap and populated with new PTE table during subsequent
> map request. Disabling large page mappings avoids the release of PMD
> entry and avoid translations seeing stale PMD entry in walk cache.
> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
> Tegra234 devices. This is recommended fix from Tegra hardware design
> team.

Is this related to any of the several known MMU-500 invalidation errata, 
or is it definitely specific to something NVIDIA have done with their 
integration?

> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>   3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> index 01e9b50b10a1..b7a3d06da2f4 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
>   			dev_name(dev), err);
>   }
>   
> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
> +{
> +	const struct device_node *np = smmu->dev->of_node;
> +
> +	/*
> +	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> +	 * entries to not be invalidated correctly. The problem is that the walk
> +	 * cache index generated for IOVA is not same across translation and
> +	 * invalidation requests. This is leading to page faults when PMD entry
> +	 * is released during unmap and populated with new PTE table during
> +	 * subsequent map request. Disabling large page mappings avoids the
> +	 * release of PMD entry and avoid translations seeing stale PMD entry in
> +	 * walk cache.
> +	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
> +	 * Tegra234.
> +	 */
> +	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
> +	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
> +		smmu->pgsize_bitmap = PAGE_SIZE;
> +}
> +
>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.read_reg = nvidia_smmu_read_reg,
>   	.write_reg = nvidia_smmu_write_reg,
> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.global_fault = nvidia_smmu_global_fault,
>   	.context_fault = nvidia_smmu_context_fault,
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 568cce590ccc..3692a19a588a 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>   	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>   		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>   
> +	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
> +		smmu->impl->cfg_pgsize_bitmap(smmu);

I'm not the biggest fan of adding a super-specific hook for this, when 
it seems like it could just as easily be handled in the init_context 
hook, which is where it is precisely for the purpose of mangling the 
pgtable_cfg to influence io-pgtable's behaviour.

Thanks,
Robin.

> +
>   	if (arm_smmu_ops.pgsize_bitmap == -1UL)
>   		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>   	else
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..5d9b03024969 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>   	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>   	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
>   	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
> +	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>   };
>   
>   #define INVALID_SMENDX			-1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-19 20:27   ` Robin Murphy
  0 siblings, 0 replies; 12+ messages in thread
From: Robin Murphy @ 2022-04-19 20:27 UTC (permalink / raw)
  To: Ashish Mhetre, thierry.reding, vdumpa, will, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: nicolinc, Snikam, Pritesh Raithatha

On 2022-04-17 10:04, Ashish Mhetre wrote:
> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> entries to not be invalidated correctly. The problem is that the walk
> cache index generated for IOVA is not same across translation and
> invalidation requests. This is leading to page faults when PMD entry is
> released during unmap and populated with new PTE table during subsequent
> map request. Disabling large page mappings avoids the release of PMD
> entry and avoid translations seeing stale PMD entry in walk cache.
> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
> Tegra234 devices. This is recommended fix from Tegra hardware design
> team.

Is this related to any of the several known MMU-500 invalidation errata, 
or is it definitely specific to something NVIDIA have done with their 
integration?

> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>   3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> index 01e9b50b10a1..b7a3d06da2f4 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
>   			dev_name(dev), err);
>   }
>   
> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
> +{
> +	const struct device_node *np = smmu->dev->of_node;
> +
> +	/*
> +	 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
> +	 * entries to not be invalidated correctly. The problem is that the walk
> +	 * cache index generated for IOVA is not same across translation and
> +	 * invalidation requests. This is leading to page faults when PMD entry
> +	 * is released during unmap and populated with new PTE table during
> +	 * subsequent map request. Disabling large page mappings avoids the
> +	 * release of PMD entry and avoid translations seeing stale PMD entry in
> +	 * walk cache.
> +	 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
> +	 * Tegra234.
> +	 */
> +	if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
> +	    of_device_is_compatible(np, "nvidia,tegra194-smmu"))
> +		smmu->pgsize_bitmap = PAGE_SIZE;
> +}
> +
>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.read_reg = nvidia_smmu_read_reg,
>   	.write_reg = nvidia_smmu_write_reg,
> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
>   	.global_fault = nvidia_smmu_global_fault,
>   	.context_fault = nvidia_smmu_context_fault,
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>   	.probe_finalize = nvidia_smmu_probe_finalize,
> +	.cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>   };
>   
>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index 568cce590ccc..3692a19a588a 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>   	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>   		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>   
> +	if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
> +		smmu->impl->cfg_pgsize_bitmap(smmu);

I'm not the biggest fan of adding a super-specific hook for this, when 
it seems like it could just as easily be handled in the init_context 
hook, which is where it is precisely for the purpose of mangling the 
pgtable_cfg to influence io-pgtable's behaviour.

Thanks,
Robin.

> +
>   	if (arm_smmu_ops.pgsize_bitmap == -1UL)
>   		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>   	else
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index 2b9b42fb6f30..5d9b03024969 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>   	void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>   	void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
>   	void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
> +	void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>   };
>   
>   #define INVALID_SMENDX			-1

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
  2022-04-19 20:27   ` Robin Murphy
  (?)
@ 2022-04-20  5:32     ` Ashish Mhetre via iommu
  -1 siblings, 0 replies; 12+ messages in thread
From: Ashish Mhetre @ 2022-04-20  5:32 UTC (permalink / raw)
  To: Robin Murphy, thierry.reding, vdumpa, will, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: nicolinc, Snikam, Pritesh Raithatha



On 4/20/2022 1:57 AM, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022-04-17 10:04, Ashish Mhetre wrote:
>> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
>> entries to not be invalidated correctly. The problem is that the walk
>> cache index generated for IOVA is not same across translation and
>> invalidation requests. This is leading to page faults when PMD entry is
>> released during unmap and populated with new PTE table during subsequent
>> map request. Disabling large page mappings avoids the release of PMD
>> entry and avoid translations seeing stale PMD entry in walk cache.
>> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
>> Tegra234 devices. This is recommended fix from Tegra hardware design
>> team.
> 
> Is this related to any of the several known MMU-500 invalidation errata,
> or is it definitely specific to something NVIDIA have done with their
> integration?
> 
It's not a known MMU-500 errata. It is specific to NVIDIA.

>> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
>> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
>> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>>   3 files changed, 27 insertions(+)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> index 01e9b50b10a1..b7a3d06da2f4 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct 
>> arm_smmu_device *smmu, struct devi
>>                       dev_name(dev), err);
>>   }
>>
>> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
>> +{
>> +     const struct device_node *np = smmu->dev->of_node;
>> +
>> +     /*
>> +      * Tegra194 and Tegra234 SoCs have the erratum that causes walk 
>> cache
>> +      * entries to not be invalidated correctly. The problem is that 
>> the walk
>> +      * cache index generated for IOVA is not same across translation 
>> and
>> +      * invalidation requests. This is leading to page faults when 
>> PMD entry
>> +      * is released during unmap and populated with new PTE table during
>> +      * subsequent map request. Disabling large page mappings avoids the
>> +      * release of PMD entry and avoid translations seeing stale PMD 
>> entry in
>> +      * walk cache.
>> +      * Fix this by limiting the page mappings to PAGE_SIZE on 
>> Tegra194 and
>> +      * Tegra234.
>> +      */
>> +     if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
>> +         of_device_is_compatible(np, "nvidia,tegra194-smmu"))
>> +             smmu->pgsize_bitmap = PAGE_SIZE;
>> +}
>> +
>>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>>       .read_reg = nvidia_smmu_read_reg,
>>       .write_reg = nvidia_smmu_write_reg,
>> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl 
>> nvidia_smmu_impl = {
>>       .global_fault = nvidia_smmu_global_fault,
>>       .context_fault = nvidia_smmu_context_fault,
>>       .probe_finalize = nvidia_smmu_probe_finalize,
>> +     .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>>   };
>>
>>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>>       .probe_finalize = nvidia_smmu_probe_finalize,
>> +     .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>>   };
>>
>>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device 
>> *smmu)
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> index 568cce590ccc..3692a19a588a 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct 
>> arm_smmu_device *smmu)
>>       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>>               smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>>
>> +     if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
>> +             smmu->impl->cfg_pgsize_bitmap(smmu);
> 
> I'm not the biggest fan of adding a super-specific hook for this, when
> it seems like it could just as easily be handled in the init_context
> hook, which is where it is precisely for the purpose of mangling the
> pgtable_cfg to influence io-pgtable's behaviour.
> 
Yes, we can use init_context() to override pgsize_bitmap. I'll update
that in next version.

> Thanks,
> Robin.
> 
>> +
>>       if (arm_smmu_ops.pgsize_bitmap == -1UL)
>>               arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>>       else
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> index 2b9b42fb6f30..5d9b03024969 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>>       void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>>       void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 
>> reg);
>>       void (*probe_finalize)(struct arm_smmu_device *smmu, struct 
>> device *dev);
>> +     void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>>   };
>>
>>   #define INVALID_SMENDX                      -1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-20  5:32     ` Ashish Mhetre via iommu
  0 siblings, 0 replies; 12+ messages in thread
From: Ashish Mhetre via iommu @ 2022-04-20  5:32 UTC (permalink / raw)
  To: Robin Murphy, thierry.reding, vdumpa, will, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: Snikam, Pritesh Raithatha



On 4/20/2022 1:57 AM, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022-04-17 10:04, Ashish Mhetre wrote:
>> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
>> entries to not be invalidated correctly. The problem is that the walk
>> cache index generated for IOVA is not same across translation and
>> invalidation requests. This is leading to page faults when PMD entry is
>> released during unmap and populated with new PTE table during subsequent
>> map request. Disabling large page mappings avoids the release of PMD
>> entry and avoid translations seeing stale PMD entry in walk cache.
>> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
>> Tegra234 devices. This is recommended fix from Tegra hardware design
>> team.
> 
> Is this related to any of the several known MMU-500 invalidation errata,
> or is it definitely specific to something NVIDIA have done with their
> integration?
> 
It's not a known MMU-500 errata. It is specific to NVIDIA.

>> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
>> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
>> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>>   3 files changed, 27 insertions(+)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> index 01e9b50b10a1..b7a3d06da2f4 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct 
>> arm_smmu_device *smmu, struct devi
>>                       dev_name(dev), err);
>>   }
>>
>> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
>> +{
>> +     const struct device_node *np = smmu->dev->of_node;
>> +
>> +     /*
>> +      * Tegra194 and Tegra234 SoCs have the erratum that causes walk 
>> cache
>> +      * entries to not be invalidated correctly. The problem is that 
>> the walk
>> +      * cache index generated for IOVA is not same across translation 
>> and
>> +      * invalidation requests. This is leading to page faults when 
>> PMD entry
>> +      * is released during unmap and populated with new PTE table during
>> +      * subsequent map request. Disabling large page mappings avoids the
>> +      * release of PMD entry and avoid translations seeing stale PMD 
>> entry in
>> +      * walk cache.
>> +      * Fix this by limiting the page mappings to PAGE_SIZE on 
>> Tegra194 and
>> +      * Tegra234.
>> +      */
>> +     if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
>> +         of_device_is_compatible(np, "nvidia,tegra194-smmu"))
>> +             smmu->pgsize_bitmap = PAGE_SIZE;
>> +}
>> +
>>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>>       .read_reg = nvidia_smmu_read_reg,
>>       .write_reg = nvidia_smmu_write_reg,
>> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl 
>> nvidia_smmu_impl = {
>>       .global_fault = nvidia_smmu_global_fault,
>>       .context_fault = nvidia_smmu_context_fault,
>>       .probe_finalize = nvidia_smmu_probe_finalize,
>> +     .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>>   };
>>
>>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>>       .probe_finalize = nvidia_smmu_probe_finalize,
>> +     .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>>   };
>>
>>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device 
>> *smmu)
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> index 568cce590ccc..3692a19a588a 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct 
>> arm_smmu_device *smmu)
>>       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>>               smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>>
>> +     if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
>> +             smmu->impl->cfg_pgsize_bitmap(smmu);
> 
> I'm not the biggest fan of adding a super-specific hook for this, when
> it seems like it could just as easily be handled in the init_context
> hook, which is where it is precisely for the purpose of mangling the
> pgtable_cfg to influence io-pgtable's behaviour.
> 
Yes, we can use init_context() to override pgsize_bitmap. I'll update
that in next version.

> Thanks,
> Robin.
> 
>> +
>>       if (arm_smmu_ops.pgsize_bitmap == -1UL)
>>               arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>>       else
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> index 2b9b42fb6f30..5d9b03024969 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>>       void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>>       void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 
>> reg);
>>       void (*probe_finalize)(struct arm_smmu_device *smmu, struct 
>> device *dev);
>> +     void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>>   };
>>
>>   #define INVALID_SMENDX                      -1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu
@ 2022-04-20  5:32     ` Ashish Mhetre via iommu
  0 siblings, 0 replies; 12+ messages in thread
From: Ashish Mhetre @ 2022-04-20  5:32 UTC (permalink / raw)
  To: Robin Murphy, thierry.reding, vdumpa, will, joro, jonathanh,
	linux-tegra, linux-arm-kernel, iommu, linux-kernel
  Cc: nicolinc, Snikam, Pritesh Raithatha



On 4/20/2022 1:57 AM, Robin Murphy wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 2022-04-17 10:04, Ashish Mhetre wrote:
>> Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
>> entries to not be invalidated correctly. The problem is that the walk
>> cache index generated for IOVA is not same across translation and
>> invalidation requests. This is leading to page faults when PMD entry is
>> released during unmap and populated with new PTE table during subsequent
>> map request. Disabling large page mappings avoids the release of PMD
>> entry and avoid translations seeing stale PMD entry in walk cache.
>> Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and
>> Tegra234 devices. This is recommended fix from Tegra hardware design
>> team.
> 
> Is this related to any of the several known MMU-500 invalidation errata,
> or is it definitely specific to something NVIDIA have done with their
> integration?
> 
It's not a known MMU-500 errata. It is specific to NVIDIA.

>> Co-developed-by: Pritesh Raithatha <praithatha@nvidia.com>
>> Signed-off-by: Pritesh Raithatha <praithatha@nvidia.com>
>> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 23 ++++++++++++++++++++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.c        |  3 +++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h        |  1 +
>>   3 files changed, 27 insertions(+)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> index 01e9b50b10a1..b7a3d06da2f4 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
>> @@ -258,6 +258,27 @@ static void nvidia_smmu_probe_finalize(struct 
>> arm_smmu_device *smmu, struct devi
>>                       dev_name(dev), err);
>>   }
>>
>> +static void nvidia_smmu_cfg_pgsize_bitmap(struct arm_smmu_device *smmu)
>> +{
>> +     const struct device_node *np = smmu->dev->of_node;
>> +
>> +     /*
>> +      * Tegra194 and Tegra234 SoCs have the erratum that causes walk 
>> cache
>> +      * entries to not be invalidated correctly. The problem is that 
>> the walk
>> +      * cache index generated for IOVA is not same across translation 
>> and
>> +      * invalidation requests. This is leading to page faults when 
>> PMD entry
>> +      * is released during unmap and populated with new PTE table during
>> +      * subsequent map request. Disabling large page mappings avoids the
>> +      * release of PMD entry and avoid translations seeing stale PMD 
>> entry in
>> +      * walk cache.
>> +      * Fix this by limiting the page mappings to PAGE_SIZE on 
>> Tegra194 and
>> +      * Tegra234.
>> +      */
>> +     if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
>> +         of_device_is_compatible(np, "nvidia,tegra194-smmu"))
>> +             smmu->pgsize_bitmap = PAGE_SIZE;
>> +}
>> +
>>   static const struct arm_smmu_impl nvidia_smmu_impl = {
>>       .read_reg = nvidia_smmu_read_reg,
>>       .write_reg = nvidia_smmu_write_reg,
>> @@ -268,10 +289,12 @@ static const struct arm_smmu_impl 
>> nvidia_smmu_impl = {
>>       .global_fault = nvidia_smmu_global_fault,
>>       .context_fault = nvidia_smmu_context_fault,
>>       .probe_finalize = nvidia_smmu_probe_finalize,
>> +     .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>>   };
>>
>>   static const struct arm_smmu_impl nvidia_smmu_single_impl = {
>>       .probe_finalize = nvidia_smmu_probe_finalize,
>> +     .cfg_pgsize_bitmap = nvidia_smmu_cfg_pgsize_bitmap,
>>   };
>>
>>   struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device 
>> *smmu)
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> index 568cce590ccc..3692a19a588a 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>> @@ -1872,6 +1872,9 @@ static int arm_smmu_device_cfg_probe(struct 
>> arm_smmu_device *smmu)
>>       if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
>>               smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
>>
>> +     if (smmu->impl && smmu->impl->cfg_pgsize_bitmap)
>> +             smmu->impl->cfg_pgsize_bitmap(smmu);
> 
> I'm not the biggest fan of adding a super-specific hook for this, when
> it seems like it could just as easily be handled in the init_context
> hook, which is where it is precisely for the purpose of mangling the
> pgtable_cfg to influence io-pgtable's behaviour.
> 
Yes, we can use init_context() to override pgsize_bitmap. I'll update
that in next version.

> Thanks,
> Robin.
> 
>> +
>>       if (arm_smmu_ops.pgsize_bitmap == -1UL)
>>               arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>>       else
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h 
>> b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> index 2b9b42fb6f30..5d9b03024969 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> @@ -442,6 +442,7 @@ struct arm_smmu_impl {
>>       void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
>>       void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 
>> reg);
>>       void (*probe_finalize)(struct arm_smmu_device *smmu, struct 
>> device *dev);
>> +     void (*cfg_pgsize_bitmap)(struct arm_smmu_device *smmu);
>>   };
>>
>>   #define INVALID_SMENDX                      -1

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-04-20  5:33 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-17  9:04 [Patch v1] iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu Ashish Mhetre
2022-04-17  9:04 ` Ashish Mhetre
2022-04-17  9:04 ` Ashish Mhetre via iommu
2022-04-19 12:51 ` Jon Hunter
2022-04-19 12:51   ` Jon Hunter
2022-04-19 12:51   ` Jon Hunter via iommu
2022-04-19 20:27 ` Robin Murphy
2022-04-19 20:27   ` Robin Murphy
2022-04-19 20:27   ` Robin Murphy
2022-04-20  5:32   ` Ashish Mhetre
2022-04-20  5:32     ` Ashish Mhetre
2022-04-20  5:32     ` Ashish Mhetre via iommu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.